diff --git a/doc/source/api.rst b/doc/source/api.rst index 8dcf9c0f52de4..f74f5f0d28a58 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -275,12 +275,30 @@ Binary operator functions :toctree: generated/ Series.add - Series.div - Series.mul Series.sub + Series.mul + Series.div + Series.truediv + Series.floordiv + Series.mod + Series.pow + Series.radd + Series.rsub + Series.rmul + Series.rdiv + Series.rtruediv + Series.rfloordiv + Series.rmod + Series.rpow Series.combine Series.combine_first Series.round + Series.lt + Series.gt + Series.le + Series.ge + Series.ne + Series.eq Function application, GroupBy ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -480,13 +498,27 @@ Binary operator functions :toctree: generated/ DataFrame.add - DataFrame.div - DataFrame.mul DataFrame.sub + DataFrame.mul + DataFrame.div + DataFrame.truediv + DataFrame.floordiv + DataFrame.mod + DataFrame.pow DataFrame.radd - DataFrame.rdiv - DataFrame.rmul DataFrame.rsub + DataFrame.rmul + DataFrame.rdiv + DataFrame.rtruediv + DataFrame.rfloordiv + DataFrame.rmod + DataFrame.rpow + DataFrame.lt + DataFrame.gt + DataFrame.le + DataFrame.ge + DataFrame.ne + DataFrame.eq DataFrame.combine DataFrame.combineAdd DataFrame.combine_first @@ -710,9 +742,27 @@ Binary operator functions :toctree: generated/ Panel.add - Panel.div - Panel.mul Panel.sub + Panel.mul + Panel.div + Panel.truediv + Panel.floordiv + Panel.mod + Panel.pow + Panel.radd + Panel.rsub + Panel.rmul + Panel.rdiv + Panel.rtruediv + Panel.rfloordiv + Panel.rmod + Panel.rpow + Panel.lt + Panel.gt + Panel.le + Panel.ge + Panel.ne + Panel.eq Function application, GroupBy ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/source/release.rst b/doc/source/release.rst index 1f0e447429d6a..73e7e3affd944 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -263,6 +263,10 @@ API Changes - Begin removing methods that don't make sense on ``GroupBy`` objects (:issue:`4887`). - Remove deprecated ``read_clipboard/to_clipboard/ExcelFile/ExcelWriter`` from ``pandas.io.parsers`` (:issue:`3717`) + - All non-Index NDFrames (``Series``, ``DataFrame``, ``Panel``, ``Panel4D``, + ``SparsePanel``, etc.), now support the entire set of arithmetic operators + and arithmetic flex methods (add, sub, mul, etc.). ``SparsePanel`` does not + support ``pow`` or ``mod`` with non-scalars. (:issue:`3765`) Internal Refactoring ~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/source/v0.13.0.txt b/doc/source/v0.13.0.txt index c7f80a49b9b6c..0796f34ead839 100644 --- a/doc/source/v0.13.0.txt +++ b/doc/source/v0.13.0.txt @@ -68,6 +68,11 @@ API changes df1 and df2 s1 and s2 + - All non-Index NDFrames (``Series``, ``DataFrame``, ``Panel``, ``Panel4D``, + ``SparsePanel``, etc.), now support the entire set of arithmetic operators + and arithmetic flex methods (add, sub, mul, etc.). ``SparsePanel`` does not + support ``pow`` or ``mod`` with non-scalars. (:issue:`3765`) + Prior Version Deprecations/Changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/pandas/computation/expressions.py b/pandas/computation/expressions.py index 45c9a2d5259cb..3c1fb091ab823 100644 --- a/pandas/computation/expressions.py +++ b/pandas/computation/expressions.py @@ -15,6 +15,8 @@ except ImportError: # pragma: no cover _NUMEXPR_INSTALLED = False +_TEST_MODE = None +_TEST_RESULT = None _USE_NUMEXPR = _NUMEXPR_INSTALLED _evaluate = None _where = None @@ -55,9 +57,10 @@ def set_numexpr_threads(n=None): def _evaluate_standard(op, op_str, a, b, raise_on_error=True, **eval_kwargs): """ standard evaluation """ + if _TEST_MODE: + _store_test_result(False) return op(a, b) - def _can_use_numexpr(op, op_str, a, b, dtype_check): """ return a boolean if we WILL be using numexpr """ if op_str is not None: @@ -88,11 +91,8 @@ def _evaluate_numexpr(op, op_str, a, b, raise_on_error=False, **eval_kwargs): if _can_use_numexpr(op, op_str, a, b, 'evaluate'): try: - a_value, b_value = a, b - if hasattr(a_value, 'values'): - a_value = a_value.values - if hasattr(b_value, 'values'): - b_value = b_value.values + a_value = getattr(a, "values", a) + b_value = getattr(b, "values", b) result = ne.evaluate('a_value %s b_value' % op_str, local_dict={'a_value': a_value, 'b_value': b_value}, @@ -104,6 +104,9 @@ def _evaluate_numexpr(op, op_str, a, b, raise_on_error=False, **eval_kwargs): if raise_on_error: raise + if _TEST_MODE: + _store_test_result(result is not None) + if result is None: result = _evaluate_standard(op, op_str, a, b, raise_on_error) @@ -119,13 +122,9 @@ def _where_numexpr(cond, a, b, raise_on_error=False): if _can_use_numexpr(None, 'where', a, b, 'where'): try: - cond_value, a_value, b_value = cond, a, b - if hasattr(cond_value, 'values'): - cond_value = cond_value.values - if hasattr(a_value, 'values'): - a_value = a_value.values - if hasattr(b_value, 'values'): - b_value = b_value.values + cond_value = getattr(cond, 'values', cond) + a_value = getattr(a, 'values', a) + b_value = getattr(b, 'values', b) result = ne.evaluate('where(cond_value, a_value, b_value)', local_dict={'cond_value': cond_value, 'a_value': a_value, @@ -189,3 +188,28 @@ def where(cond, a, b, raise_on_error=False, use_numexpr=True): if use_numexpr: return _where(cond, a, b, raise_on_error=raise_on_error) return _where_standard(cond, a, b, raise_on_error=raise_on_error) + + +def set_test_mode(v = True): + """ + Keeps track of whether numexpr was used. Stores an additional ``True`` for + every successful use of evaluate with numexpr since the last + ``get_test_result`` + """ + global _TEST_MODE, _TEST_RESULT + _TEST_MODE = v + _TEST_RESULT = [] + + +def _store_test_result(used_numexpr): + global _TEST_RESULT + if used_numexpr: + _TEST_RESULT.append(used_numexpr) + + +def get_test_result(): + """get test result and reset test_results""" + global _TEST_RESULT + res = _TEST_RESULT + _TEST_RESULT = [] + return res diff --git a/pandas/computation/tests/test_eval.py b/pandas/computation/tests/test_eval.py index e9201c233753f..aa5c0cc5d50f6 100644 --- a/pandas/computation/tests/test_eval.py +++ b/pandas/computation/tests/test_eval.py @@ -2,9 +2,7 @@ import unittest import functools -import numbers from itertools import product -import ast import nose from nose.tools import assert_raises, assert_true, assert_false, assert_equal @@ -250,12 +248,6 @@ def check_complex_cmp_op(self, lhs, cmp1, rhs, binop, cmp2): not np.isscalar(rhs_new) and binop in skip_these): with tm.assertRaises(TypeError): _eval_single_bin(lhs_new, binop, rhs_new, self.engine) - elif _bool_and_frame(lhs_new, rhs_new): - with tm.assertRaises(TypeError): - _eval_single_bin(lhs_new, binop, rhs_new, self.engine) - with tm.assertRaises(TypeError): - pd.eval('lhs_new & rhs_new'.format(binop), - engine=self.engine, parser=self.parser) else: expected = _eval_single_bin(lhs_new, binop, rhs_new, self.engine) result = pd.eval(ex, engine=self.engine, parser=self.parser) @@ -301,28 +293,15 @@ def check_operands(left, right, cmp_op): rhs_new = check_operands(mid, rhs, cmp2) if lhs_new is not None and rhs_new is not None: - # these are not compatible operands - if isinstance(lhs_new, Series) and isinstance(rhs_new, DataFrame): - self.assertRaises(TypeError, _eval_single_bin, lhs_new, '&', - rhs_new, self.engine) - elif (_bool_and_frame(lhs_new, rhs_new)): - self.assertRaises(TypeError, _eval_single_bin, lhs_new, '&', - rhs_new, self.engine) - elif _series_and_2d_ndarray(lhs_new, rhs_new): - # TODO: once #4319 is fixed add this test back in - #self.assertRaises(Exception, _eval_single_bin, lhs_new, '&', - #rhs_new, self.engine) - pass - else: - ex1 = 'lhs {0} mid {1} rhs'.format(cmp1, cmp2) - ex2 = 'lhs {0} mid and mid {1} rhs'.format(cmp1, cmp2) - ex3 = '(lhs {0} mid) & (mid {1} rhs)'.format(cmp1, cmp2) - expected = _eval_single_bin(lhs_new, '&', rhs_new, self.engine) - - for ex in (ex1, ex2, ex3): - result = pd.eval(ex, engine=self.engine, - parser=self.parser) - assert_array_equal(result, expected) + ex1 = 'lhs {0} mid {1} rhs'.format(cmp1, cmp2) + ex2 = 'lhs {0} mid and mid {1} rhs'.format(cmp1, cmp2) + ex3 = '(lhs {0} mid) & (mid {1} rhs)'.format(cmp1, cmp2) + expected = _eval_single_bin(lhs_new, '&', rhs_new, self.engine) + + for ex in (ex1, ex2, ex3): + result = pd.eval(ex, engine=self.engine, + parser=self.parser) + assert_array_equal(result, expected) @skip_incompatible_operand def check_simple_cmp_op(self, lhs, cmp1, rhs): diff --git a/pandas/core/common.py b/pandas/core/common.py index d3fa10abc7681..2c5ca42c7be86 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -8,6 +8,7 @@ import codecs import csv import sys +import types from datetime import timedelta @@ -27,6 +28,7 @@ from pandas.core.config import get_option from pandas.core import array as pa + class PandasError(Exception): pass @@ -74,6 +76,31 @@ def __instancecheck__(cls, inst): ABCGeneric = _ABCGeneric("ABCGeneric", tuple(), {}) + +def bind_method(cls, name, func): + """Bind a method to class, python 2 and python 3 compatible. + + Parameters + ---------- + + cls : type + class to receive bound method + name : basestring + name of method on class instance + func : function + function to be bound as method + + + Returns + ------- + None + """ + # only python 2 has bound/unbound method issue + if not compat.PY3: + setattr(cls, name, types.MethodType(func, None, cls)) + else: + setattr(cls, name, func) + def isnull(obj): """Detect missing values (NaN in numeric arrays, None/NaN in object arrays) @@ -360,10 +387,10 @@ def _take_2d_multi_generic(arr, indexer, out, fill_value, mask_info): if col_needs: out[:, col_mask] = fill_value for i in range(len(row_idx)): - u = row_idx[i] + u_ = row_idx[i] for j in range(len(col_idx)): v = col_idx[j] - out[i, j] = arr[u, v] + out[i, j] = arr[u_, v] def _take_nd_generic(arr, indexer, out, axis, fill_value, mask_info): @@ -2348,3 +2375,10 @@ def save(obj, path): # TODO remove in 0.13 warnings.warn("save is deprecated, use obj.to_pickle", FutureWarning) from pandas.io.pickle import to_pickle return to_pickle(obj, path) + + +def _maybe_match_name(a, b): + name = None + if a.name == b.name: + name = a.name + return name diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 01e0d74ef8ce6..c6727f91644fc 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -34,7 +34,7 @@ from pandas.core.internals import (BlockManager, create_block_manager_from_arrays, create_block_manager_from_blocks) -from pandas.core.series import Series, _radd_compat +from pandas.core.series import Series import pandas.computation.expressions as expressions from pandas.computation.eval import eval as _eval from pandas.computation.expr import _ensure_scope @@ -42,7 +42,6 @@ from pandas.compat import(range, zip, lrange, lmap, lzip, StringIO, u, OrderedDict, raise_with_traceback) from pandas import compat -from pandas.util.terminal import get_terminal_size from pandas.util.decorators import deprecate, Appender, Substitution from pandas.tseries.period import PeriodIndex @@ -53,6 +52,7 @@ import pandas.core.common as com import pandas.core.format as fmt import pandas.core.nanops as nanops +import pandas.core.ops as ops import pandas.lib as lib import pandas.algos as _algos @@ -62,31 +62,6 @@ #---------------------------------------------------------------------- # Docstring templates -_arith_doc = """ -Binary operator %s with support to substitute a fill_value for missing data in -one of the inputs - -Parameters ----------- -other : Series, DataFrame, or constant -axis : {0, 1, 'index', 'columns'} - For Series input, axis to match Series index on -fill_value : None or float value, default None - Fill missing (NaN) values with this value. If both DataFrame locations are - missing, the result will be missing -level : int or name - Broadcast across a level, matching Index values on the - passed MultiIndex level - -Notes ------ -Mismatched indices will be unioned together - -Returns -------- -result : DataFrame -""" - _stat_doc = """ Return %(name)s over requested axis. @@ -181,153 +156,6 @@ merged : DataFrame """ -#---------------------------------------------------------------------- -# Factory helper methods - - -def _arith_method(op, name, str_rep=None, default_axis='columns', fill_zeros=None, **eval_kwargs): - def na_op(x, y): - try: - result = expressions.evaluate( - op, str_rep, x, y, raise_on_error=True, **eval_kwargs) - result = com._fill_zeros(result, y, fill_zeros) - - except TypeError: - xrav = x.ravel() - result = np.empty(x.size, dtype=x.dtype) - if isinstance(y, (np.ndarray, Series)): - yrav = y.ravel() - mask = notnull(xrav) & notnull(yrav) - result[mask] = op(xrav[mask], yrav[mask]) - else: - mask = notnull(xrav) - result[mask] = op(xrav[mask], y) - - result, changed = com._maybe_upcast_putmask(result, -mask, np.nan) - result = result.reshape(x.shape) - - return result - - @Appender(_arith_doc % name) - def f(self, other, axis=default_axis, level=None, fill_value=None): - if isinstance(other, DataFrame): # Another DataFrame - return self._combine_frame(other, na_op, fill_value, level) - elif isinstance(other, Series): - return self._combine_series(other, na_op, fill_value, axis, level) - elif isinstance(other, (list, tuple)): - if axis is not None and self._get_axis_name(axis) == 'index': - casted = Series(other, index=self.index) - else: - casted = Series(other, index=self.columns) - return self._combine_series(casted, na_op, fill_value, axis, level) - elif isinstance(other, np.ndarray): - if other.ndim == 1: - if axis is not None and self._get_axis_name(axis) == 'index': - casted = Series(other, index=self.index) - else: - casted = Series(other, index=self.columns) - return self._combine_series(casted, na_op, fill_value, - axis, level) - elif other.ndim == 2: - casted = DataFrame(other, index=self.index, - columns=self.columns) - return self._combine_frame(casted, na_op, fill_value, level) - else: - raise ValueError("Incompatible argument shape %s" % (other.shape,)) - else: - return self._combine_const(other, na_op) - - f.__name__ = name - - return f - - -def _flex_comp_method(op, name, str_rep=None, default_axis='columns'): - - def na_op(x, y): - try: - result = op(x, y) - except TypeError: - xrav = x.ravel() - result = np.empty(x.size, dtype=x.dtype) - if isinstance(y, (np.ndarray, Series)): - yrav = y.ravel() - mask = notnull(xrav) & notnull(yrav) - result[mask] = op(np.array(list(xrav[mask])), - np.array(list(yrav[mask]))) - else: - mask = notnull(xrav) - result[mask] = op(np.array(list(xrav[mask])), y) - - if op == operator.ne: # pragma: no cover - np.putmask(result, -mask, True) - else: - np.putmask(result, -mask, False) - result = result.reshape(x.shape) - - return result - - @Appender('Wrapper for flexible comparison methods %s' % name) - def f(self, other, axis=default_axis, level=None): - if isinstance(other, DataFrame): # Another DataFrame - return self._flex_compare_frame(other, na_op, str_rep, level) - - elif isinstance(other, Series): - return self._combine_series(other, na_op, None, axis, level) - - elif isinstance(other, (list, tuple)): - if axis is not None and self._get_axis_name(axis) == 'index': - casted = Series(other, index=self.index) - else: - casted = Series(other, index=self.columns) - - return self._combine_series(casted, na_op, None, axis, level) - - elif isinstance(other, np.ndarray): - if other.ndim == 1: - if axis is not None and self._get_axis_name(axis) == 'index': - casted = Series(other, index=self.index) - else: - casted = Series(other, index=self.columns) - - return self._combine_series(casted, na_op, None, axis, level) - - elif other.ndim == 2: - casted = DataFrame(other, index=self.index, - columns=self.columns) - - return self._flex_compare_frame(casted, na_op, str_rep, level) - - else: - raise ValueError("Incompatible argument shape: %s" % - (other.shape,)) - - else: - return self._combine_const(other, na_op) - - f.__name__ = name - - return f - - -def _comp_method(func, name, str_rep): - @Appender('Wrapper for comparison method %s' % name) - def f(self, other): - if isinstance(other, DataFrame): # Another DataFrame - return self._compare_frame(other, func, str_rep) - elif isinstance(other, Series): - return self._combine_series_infer(other, func) - else: - - # straight boolean comparisions we want to allow all columns - # (regardless of dtype to pass thru) - return self._combine_const(other, func, raise_on_error=False).fillna(True).astype(bool) - - f.__name__ = name - - return f - - #---------------------------------------------------------------------- # DataFrame class @@ -752,79 +580,6 @@ def __len__(self): """Returns length of info axis, but here we use the index """ return len(self.index) - #---------------------------------------------------------------------- - # Arithmetic methods - - add = _arith_method(operator.add, 'add', '+') - mul = _arith_method(operator.mul, 'multiply', '*') - sub = _arith_method(operator.sub, 'subtract', '-') - div = divide = _arith_method(lambda x, y: x / y, 'divide', '/') - pow = _arith_method(operator.pow, 'pow', '**') - mod = _arith_method(lambda x, y: x % y, 'mod') - - radd = _arith_method(_radd_compat, 'radd') - rmul = _arith_method(operator.mul, 'rmultiply') - rsub = _arith_method(lambda x, y: y - x, 'rsubtract') - rdiv = _arith_method(lambda x, y: y / x, 'rdivide') - rpow = _arith_method(lambda x, y: y ** x, 'rpow') - rmod = _arith_method(lambda x, y: y % x, 'rmod') - - __add__ = _arith_method(operator.add, '__add__', '+', default_axis=None) - __sub__ = _arith_method(operator.sub, '__sub__', '-', default_axis=None) - __mul__ = _arith_method(operator.mul, '__mul__', '*', default_axis=None) - __truediv__ = _arith_method(operator.truediv, '__truediv__', '/', - default_axis=None, fill_zeros=np.inf, truediv=True) - # numexpr produces a different value (python/numpy: 0.000, numexpr: inf) - # when dividing by zero, so can't use floordiv speed up (yet) - # __floordiv__ = _arith_method(operator.floordiv, '__floordiv__', '//', - __floordiv__ = _arith_method(operator.floordiv, '__floordiv__', - default_axis=None, fill_zeros=np.inf) - __pow__ = _arith_method(operator.pow, '__pow__', '**', default_axis=None) - - # currently causes a floating point exception to occur - so sticking with unaccelerated for now - # __mod__ = _arith_method(operator.mod, '__mod__', '%', default_axis=None, fill_zeros=np.nan) - __mod__ = _arith_method( - operator.mod, '__mod__', default_axis=None, fill_zeros=np.nan) - - __radd__ = _arith_method(_radd_compat, '__radd__', default_axis=None) - __rmul__ = _arith_method(operator.mul, '__rmul__', default_axis=None) - __rsub__ = _arith_method(lambda x, y: y - x, '__rsub__', default_axis=None) - __rtruediv__ = _arith_method(lambda x, y: y / x, '__rtruediv__', - default_axis=None, fill_zeros=np.inf) - __rfloordiv__ = _arith_method(lambda x, y: y // x, '__rfloordiv__', - default_axis=None, fill_zeros=np.inf) - __rpow__ = _arith_method(lambda x, y: y ** x, '__rpow__', - default_axis=None) - __rmod__ = _arith_method(lambda x, y: y % x, '__rmod__', default_axis=None, - fill_zeros=np.nan) - - # boolean operators - __and__ = _arith_method(operator.and_, '__and__', '&') - __or__ = _arith_method(operator.or_, '__or__', '|') - __xor__ = _arith_method(operator.xor, '__xor__') - - # Python 2 division methods - if not compat.PY3: - __div__ = _arith_method(operator.div, '__div__', '/', - default_axis=None, fill_zeros=np.inf, truediv=False) - __rdiv__ = _arith_method(lambda x, y: y / x, '__rdiv__', - default_axis=None, fill_zeros=np.inf) - - # Comparison methods - __eq__ = _comp_method(operator.eq, '__eq__', '==') - __ne__ = _comp_method(operator.ne, '__ne__', '!=') - __lt__ = _comp_method(operator.lt, '__lt__', '<') - __gt__ = _comp_method(operator.gt, '__gt__', '>') - __le__ = _comp_method(operator.le, '__le__', '<=') - __ge__ = _comp_method(operator.ge, '__ge__', '>=') - - eq = _flex_comp_method(operator.eq, 'eq', '==') - ne = _flex_comp_method(operator.ne, 'ne', '!=') - lt = _flex_comp_method(operator.lt, 'lt', '<') - gt = _flex_comp_method(operator.gt, 'gt', '>') - le = _flex_comp_method(operator.le, 'le', '<=') - ge = _flex_comp_method(operator.ge, 'ge', '>=') - def dot(self, other): """ Matrix multiplication with DataFrame or Series objects @@ -5152,6 +4907,8 @@ def boxplot(self, column=None, by=None, ax=None, fontsize=None, return ax DataFrame.boxplot = boxplot +ops.add_flex_arithmetic_methods(DataFrame, **ops.frame_flex_funcs) +ops.add_special_arithmetic_methods(DataFrame, **ops.frame_special_funcs) if __name__ == '__main__': import nose diff --git a/pandas/core/ops.py b/pandas/core/ops.py new file mode 100644 index 0000000000000..4ce2143fdd92c --- /dev/null +++ b/pandas/core/ops.py @@ -0,0 +1,911 @@ +""" +Arithmetic operations for PandasObjects + +This is not a public API. +""" +import operator +import numpy as np +import pandas as pd +from pandas import compat, lib, tslib +import pandas.index as _index +from pandas.util.decorators import Appender +import pandas.core.common as com +import pandas.core.array as pa +import pandas.computation.expressions as expressions +from pandas.core.common import(bind_method, is_list_like, notnull, isnull, + _values_from_object, _maybe_match_name) + +# ----------------------------------------------------------------------------- +# Functions that add arithmetic methods to objects, given arithmetic factory +# methods + +def _create_methods(arith_method, radd_func, comp_method, bool_method, + use_numexpr, special=False, default_axis='columns'): + # NOTE: Only frame cares about default_axis, specifically: special methods + # have default axis None, whereas flex methods have default axis 'columns' + # if we're not using numexpr, then don't pass a str_rep + if use_numexpr: + op = lambda x: x + else: + op = lambda x: None + if special: + def names(x): + if x[-1] == "_": + return "__%s_" % x + else: + return "__%s__" % x + else: + names = lambda x: x + radd_func = radd_func or operator.add + # Inframe, all special methods have default_axis=None, flex methods have default_axis set to the default (columns) + new_methods = dict( + add=arith_method(operator.add, names('add'), op('+'), default_axis=default_axis), + radd=arith_method(radd_func, names('radd'), op('+'), default_axis=default_axis), + sub=arith_method(operator.sub, names('sub'), op('-'), default_axis=default_axis), + mul=arith_method(operator.mul, names('mul'), op('*'), default_axis=default_axis), + truediv=arith_method(operator.truediv, names('truediv'), op('/'), + truediv=True, fill_zeros=np.inf, default_axis=default_axis), + floordiv=arith_method(operator.floordiv, names('floordiv'), op('//'), + default_axis=default_axis, fill_zeros=np.inf), + # Causes a floating point exception in the tests when numexpr + # enabled, so for now no speedup + mod=arith_method(operator.mod, names('mod'), default_axis=default_axis, + fill_zeros=np.nan), + pow=arith_method(operator.pow, names('pow'), op('**'), default_axis=default_axis), + # not entirely sure why this is necessary, but previously was included + # so it's here to maintain compatibility + rmul=arith_method(operator.mul, names('rmul'), default_axis=default_axis), + rsub=arith_method(lambda x, y: y - x, names('rsub'), default_axis=default_axis), + rtruediv=arith_method(lambda x, y: operator.truediv(y, x), names('rtruediv'), + truediv=True, fill_zeros=np.inf, default_axis=default_axis), + rfloordiv=arith_method(lambda x, y: operator.floordiv(y, x), names('rfloordiv'), + default_axis=default_axis, fill_zeros=np.inf), + rpow=arith_method(lambda x, y: y ** x, names('rpow'), default_axis=default_axis), + rmod=arith_method(lambda x, y: y % x, names('rmod'), default_axis=default_axis), + ) + if not compat.PY3: + new_methods["div"] = arith_method(operator.div, names('div'), op('/'), + truediv=False, fill_zeros=np.inf, default_axis=default_axis) + new_methods["rdiv"] = arith_method(lambda x, y: operator.div(y, x), names('rdiv'), + truediv=False, fill_zeros=np.inf, default_axis=default_axis) + else: + new_methods["div"] = arith_method(operator.truediv, names('div'), op('/'), + truediv=True, fill_zeros=np.inf, default_axis=default_axis) + new_methods["rdiv"] = arith_method(lambda x, y: operator.truediv(y, x), names('rdiv'), + truediv=False, fill_zeros=np.inf, default_axis=default_axis) + # Comp methods never had a default axis set + if comp_method: + new_methods.update(dict( + eq=comp_method(operator.eq, names('eq'), op('==')), + ne=comp_method(operator.ne, names('ne'), op('!='), masker=True), + lt=comp_method(operator.lt, names('lt'), op('<')), + gt=comp_method(operator.gt, names('gt'), op('>')), + le=comp_method(operator.le, names('le'), op('<=')), + ge=comp_method(operator.ge, names('ge'), op('>=')), + )) + if bool_method: + new_methods.update(dict( + and_=bool_method(operator.and_, names('and_ [&]'), op('&')), + or_=bool_method(operator.or_, names('or_ [|]'), op('|')), + # For some reason ``^`` wasn't used in original. + xor=bool_method(operator.xor, names('xor [^]')), + rand_=bool_method(lambda x, y: operator.and_(y, x), names('rand_[&]')), + ror_=bool_method(lambda x, y: operator.or_(y, x), names('ror_ [|]')), + rxor=bool_method(lambda x, y: operator.xor(y, x), names('rxor [^]')) + )) + + new_methods = dict((names(k), v) for k, v in new_methods.items()) + return new_methods + + +def add_methods(cls, new_methods, force, select, exclude): + if select and exclude: + raise TypeError("May only pass either select or exclude") + methods = new_methods + if select: + select = set(select) + methods = {} + for key, method in new_methods.items(): + if key in select: + methods[key] = method + if exclude: + for k in exclude: + new_methods.pop(k, None) + + for name, method in new_methods.items(): + if force or name not in cls.__dict__: + bind_method(cls, name, method) + +#---------------------------------------------------------------------- +# Arithmetic +def add_special_arithmetic_methods(cls, arith_method=None, radd_func=None, + comp_method=None, bool_method=None, + use_numexpr=True, force=False, select=None, + exclude=None): + """ + Adds the full suite of special arithmetic methods (``__add__``, ``__sub__``, etc.) to the class. + + Parameters + ---------- + arith_method : function (optional) + factory for special arithmetic methods, with op string: + f(op, name, str_rep, default_axis=None, fill_zeros=None, **eval_kwargs) + radd_func : function (optional) + Possible replacement for ``operator.add`` for compatibility + comp_method : function, optional, + factory for rich comparison - signature: f(op, name, str_rep) + use_numexpr : bool, default True + whether to accelerate with numexpr, defaults to True + force : bool, default False + if False, checks whether function is defined **on ``cls.__dict__``** before defining + if True, always defines functions on class base + select : iterable of strings (optional) + if passed, only sets functions with names in select + exclude : iterable of strings (optional) + if passed, will not set functions with names in exclude + """ + radd_func = radd_func or operator.add + # in frame, special methods have default_axis = None, comp methods use 'columns' + new_methods = _create_methods(arith_method, radd_func, comp_method, bool_method, use_numexpr, default_axis=None, + special=True) + + # inplace operators (I feel like these should get passed an `inplace=True` + # or just be removed + new_methods.update(dict( + __iadd__=new_methods["__add__"], + __isub__=new_methods["__sub__"], + __imul__=new_methods["__mul__"], + __itruediv__=new_methods["__truediv__"], + __ipow__=new_methods["__pow__"] + )) + if not compat.PY3: + new_methods["__idiv__"] = new_methods["__div__"] + + add_methods(cls, new_methods=new_methods, force=force, select=select, exclude=exclude) + + +def add_flex_arithmetic_methods(cls, flex_arith_method, radd_func=None, + flex_comp_method=None, flex_bool_method=None, + use_numexpr=True, force=False, select=None, + exclude=None): + """ + Adds the full suite of flex arithmetic methods (``pow``, ``mul``, ``add``) to the class. + + Parameters + ---------- + flex_arith_method : function (optional) + factory for special arithmetic methods, with op string: + f(op, name, str_rep, default_axis=None, fill_zeros=None, **eval_kwargs) + radd_func : function (optional) + Possible replacement for ``lambda x, y: operator.add(y, x)`` for compatibility + flex_comp_method : function, optional, + factory for rich comparison - signature: f(op, name, str_rep) + use_numexpr : bool, default True + whether to accelerate with numexpr, defaults to True + force : bool, default False + if False, checks whether function is defined **on ``cls.__dict__``** before defining + if True, always defines functions on class base + select : iterable of strings (optional) + if passed, only sets functions with names in select + exclude : iterable of strings (optional) + if passed, will not set functions with names in exclude + """ + radd_func = radd_func or (lambda x, y: operator.add(y, x)) + # in frame, default axis is 'columns', doesn't matter for series and panel + new_methods = _create_methods( + flex_arith_method, radd_func, flex_comp_method, flex_bool_method, + use_numexpr, default_axis='columns', special=False) + new_methods.update(dict( + multiply=new_methods['mul'], + subtract=new_methods['sub'], + divide=new_methods['div'] + )) + # opt out of bool flex methods for now + for k in ('ror_', 'rxor', 'rand_'): + if k in new_methods: + new_methods.pop(k) + + add_methods(cls, new_methods=new_methods, force=force, select=select, exclude=exclude) + +def cleanup_name(name): + """cleanup special names + >>> cleanup_name("__rsub__") + sub + >>> cleanup_name("rand_") + and_ + """ + if name[:2] == "__": + name = name[2:-2] + if name[0] == "r": + name = name[1:] + # readd last _ for operator names. + if name == "or": + name = "or_" + elif name == "and": + name = "and_" + return name + + +# direct copy of original Series _TimeOp +class _TimeOp(object): + """ + Wrapper around Series datetime/time/timedelta arithmetic operations. + Generally, you should use classmethod ``maybe_convert_for_time_op`` as an + entry point. + """ + fill_value = tslib.iNaT + wrap_results = staticmethod(lambda x: x) + dtype = None + + def __init__(self, left, right, name): + self.name = name + + lvalues = self._convert_to_array(left, name=name) + rvalues = self._convert_to_array(right, name=name) + + self.is_timedelta_lhs = com.is_timedelta64_dtype(left) + self.is_datetime_lhs = com.is_datetime64_dtype(left) + self.is_integer_lhs = left.dtype.kind in ['i','u'] + self.is_datetime_rhs = com.is_datetime64_dtype(rvalues) + self.is_timedelta_rhs = (com.is_timedelta64_dtype(rvalues) + or (not self.is_datetime_rhs + and pd._np_version_under1p7)) + self.is_integer_rhs = rvalues.dtype.kind in ('i','u') + + self._validate() + + self._convert_for_datetime(lvalues, rvalues) + + def _validate(self): + # timedelta and integer mul/div + + if (self.is_timedelta_lhs and self.is_integer_rhs) or\ + (self.is_integer_lhs and self.is_timedelta_rhs): + + if self.name not in ('__truediv__','__div__','__mul__'): + raise TypeError("can only operate on a timedelta and an integer for " + "division, but the operator [%s] was passed" % self.name) + + # 2 datetimes + elif self.is_datetime_lhs and self.is_datetime_rhs: + if self.name != '__sub__': + raise TypeError("can only operate on a datetimes for subtraction, " + "but the operator [%s] was passed" % self.name) + + + # 2 timedeltas + elif self.is_timedelta_lhs and self.is_timedelta_rhs: + + if self.name not in ('__div__', '__truediv__', '__add__', '__sub__'): + raise TypeError("can only operate on a timedeltas for " + "addition, subtraction, and division, but the operator [%s] was passed" % self.name) + + # datetime and timedelta + elif self.is_datetime_lhs and self.is_timedelta_rhs: + + if self.name not in ('__add__','__sub__'): + raise TypeError("can only operate on a datetime with a rhs of a timedelta for " + "addition and subtraction, but the operator [%s] was passed" % self.name) + + elif self.is_timedelta_lhs and self.is_datetime_rhs: + + if self.name != '__add__': + raise TypeError("can only operate on a timedelta and a datetime for " + "addition, but the operator [%s] was passed" % self.name) + else: + raise TypeError('cannot operate on a series with out a rhs ' + 'of a series/ndarray of type datetime64[ns] ' + 'or a timedelta') + + def _convert_to_array(self, values, name=None): + """converts values to ndarray""" + from pandas.tseries.timedeltas import _possibly_cast_to_timedelta + + coerce = 'compat' if pd._np_version_under1p7 else True + if not is_list_like(values): + values = np.array([values]) + inferred_type = lib.infer_dtype(values) + if inferred_type in ('datetime64','datetime','date','time'): + # a datetlike + if not (isinstance(values, (pa.Array, pd.Series)) and com.is_datetime64_dtype(values)): + values = tslib.array_to_datetime(values) + elif isinstance(values, pd.DatetimeIndex): + values = values.to_series() + elif inferred_type in ('timedelta', 'timedelta64'): + # have a timedelta, convert to to ns here + values = _possibly_cast_to_timedelta(values, coerce=coerce) + elif inferred_type == 'integer': + # py3 compat where dtype is 'm' but is an integer + if values.dtype.kind == 'm': + values = values.astype('timedelta64[ns]') + elif isinstance(values, pd.PeriodIndex): + values = values.to_timestamp().to_series() + elif name not in ('__truediv__','__div__','__mul__'): + raise TypeError("incompatible type for a datetime/timedelta " + "operation [{0}]".format(name)) + elif isinstance(values[0], pd.DateOffset): + # handle DateOffsets + os = pa.array([ getattr(v,'delta',None) for v in values ]) + mask = isnull(os) + if mask.any(): + raise TypeError("cannot use a non-absolute DateOffset in " + "datetime/timedelta operations [{0}]".format( + ','.join([ com.pprint_thing(v) for v in values[mask] ]))) + values = _possibly_cast_to_timedelta(os, coerce=coerce) + else: + raise TypeError("incompatible type [{0}] for a datetime/timedelta operation".format(pa.array(values).dtype)) + + return values + + def _convert_for_datetime(self, lvalues, rvalues): + mask = None + # datetimes require views + if self.is_datetime_lhs or self.is_datetime_rhs: + # datetime subtraction means timedelta + if self.is_datetime_lhs and self.is_datetime_rhs: + self.dtype = 'timedelta64[ns]' + else: + self.dtype = 'datetime64[ns]' + mask = isnull(lvalues) | isnull(rvalues) + lvalues = lvalues.view(np.int64) + rvalues = rvalues.view(np.int64) + + # otherwise it's a timedelta + else: + self.dtype = 'timedelta64[ns]' + mask = isnull(lvalues) | isnull(rvalues) + lvalues = lvalues.astype(np.int64) + rvalues = rvalues.astype(np.int64) + + # time delta division -> unit less + # integer gets converted to timedelta in np < 1.6 + if (self.is_timedelta_lhs and self.is_timedelta_rhs) and\ + not self.is_integer_rhs and\ + not self.is_integer_lhs and\ + self.name in ('__div__', '__truediv__'): + self.dtype = 'float64' + self.fill_value = np.nan + lvalues = lvalues.astype(np.float64) + rvalues = rvalues.astype(np.float64) + + # if we need to mask the results + if mask is not None: + if mask.any(): + def f(x): + x = pa.array(x,dtype=self.dtype) + np.putmask(x,mask,self.fill_value) + return x + self.wrap_results = f + self.lvalues = lvalues + self.rvalues = rvalues + + @classmethod + def maybe_convert_for_time_op(cls, left, right, name): + """ + if ``left`` and ``right`` are appropriate for datetime arithmetic with + operation ``name``, processes them and returns a ``_TimeOp`` object + that stores all the required values. Otherwise, it will generate + either a ``NotImplementedError`` or ``None``, indicating that the + operation is unsupported for datetimes (e.g., an unsupported r_op) or + that the data is not the right type for time ops. + """ + # decide if we can do it + is_timedelta_lhs = com.is_timedelta64_dtype(left) + is_datetime_lhs = com.is_datetime64_dtype(left) + if not (is_datetime_lhs or is_timedelta_lhs): + return None + # rops are allowed. No need for special checks, just strip off + # r part. + if name.startswith('__r'): + name = "__" + name[3:] + return cls(left, right, name) + + +def _arith_method_SERIES(op, name, str_rep=None, fill_zeros=None, default_axis=None, **eval_kwargs): + """ + Wrapper function for Series arithmetic operations, to avoid + code duplication. + """ + def na_op(x, y): + try: + result = expressions.evaluate(op, str_rep, x, y, + raise_on_error=True, **eval_kwargs) + except TypeError: + result = pa.empty(len(x), dtype=x.dtype) + if isinstance(y, (pa.Array, pd.Series)): + mask = notnull(x) & notnull(y) + result[mask] = op(x[mask], y[mask]) + else: + mask = notnull(x) + result[mask] = op(x[mask], y) + + result, changed = com._maybe_upcast_putmask(result, -mask, pa.NA) + + result = com._fill_zeros(result, y, fill_zeros) + return result + + def wrapper(left, right, name=name): + + time_converted = _TimeOp.maybe_convert_for_time_op(left, right, name) + + if time_converted is None: + lvalues, rvalues = left, right + dtype = None + wrap_results = lambda x: x + elif time_converted == NotImplemented: + return NotImplemented + else: + lvalues = time_converted.lvalues + rvalues = time_converted.rvalues + dtype = time_converted.dtype + wrap_results = time_converted.wrap_results + + if isinstance(rvalues, pd.Series): + join_idx, lidx, ridx = left.index.join(rvalues.index, how='outer', + return_indexers=True) + rindex = rvalues.index + name = _maybe_match_name(left, rvalues) + lvalues = getattr(lvalues, 'values', lvalues) + rvalues = getattr(rvalues, 'values', rvalues) + if left.index.equals(rindex): + index = left.index + else: + index = join_idx + + if lidx is not None: + lvalues = com.take_1d(lvalues, lidx) + + if ridx is not None: + rvalues = com.take_1d(rvalues, ridx) + + arr = na_op(lvalues, rvalues) + + return left._constructor(wrap_results(arr), index=index, + name=name, dtype=dtype) + elif isinstance(right, pd.DataFrame): + return NotImplemented + else: + # scalars + if hasattr(lvalues, 'values'): + lvalues = lvalues.values + return left._constructor(wrap_results(na_op(lvalues, rvalues)), + index=left.index, name=left.name, dtype=dtype) + return wrapper + +def _comp_method_SERIES(op, name, str_rep=None, masker=False): + """ + Wrapper function for Series arithmetic operations, to avoid + code duplication. + """ + def na_op(x, y): + if x.dtype == np.object_: + if isinstance(y, list): + y = lib.list_to_object_array(y) + + if isinstance(y, (pa.Array, pd.Series)): + if y.dtype != np.object_: + result = lib.vec_compare(x, y.astype(np.object_), op) + else: + result = lib.vec_compare(x, y, op) + else: + result = lib.scalar_compare(x, y, op) + else: + + try: + result = getattr(x,name)(y) + if result is NotImplemented: + raise TypeError("invalid type comparison") + except (AttributeError): + result = op(x, y) + + return result + + def wrapper(self, other): + if isinstance(other, pd.Series): + name = _maybe_match_name(self, other) + if len(self) != len(other): + raise ValueError('Series lengths must match to compare') + return self._constructor(na_op(self.values, other.values), + index=self.index, name=name) + elif isinstance(other, pd.DataFrame): # pragma: no cover + return NotImplemented + elif isinstance(other, (pa.Array, pd.Series)): + if len(self) != len(other): + raise ValueError('Lengths must match to compare') + return self._constructor(na_op(self.values, np.asarray(other)), + index=self.index, name=self.name) + else: + + mask = isnull(self) + + values = self.values + other = _index.convert_scalar(values, other) + + if issubclass(values.dtype.type, np.datetime64): + values = values.view('i8') + + # scalars + res = na_op(values, other) + if np.isscalar(res): + raise TypeError('Could not compare %s type with Series' + % type(other)) + + # always return a full value series here + res = _values_from_object(res) + + res = pd.Series(res, index=self.index, name=self.name, dtype='bool') + + # mask out the invalids + if mask.any(): + res[mask.values] = masker + + return res + return wrapper + + +def _bool_method_SERIES(op, name, str_rep=None): + """ + Wrapper function for Series arithmetic operations, to avoid + code duplication. + """ + def na_op(x, y): + try: + result = op(x, y) + except TypeError: + if isinstance(y, list): + y = lib.list_to_object_array(y) + + if isinstance(y, (pa.Array, pd.Series)): + if (x.dtype == np.bool_ and + y.dtype == np.bool_): # pragma: no cover + result = op(x, y) # when would this be hit? + else: + x = com._ensure_object(x) + y = com._ensure_object(y) + result = lib.vec_binop(x, y, op) + else: + result = lib.scalar_binop(x, y, op) + + return result + + def wrapper(self, other): + if isinstance(other, pd.Series): + name = _maybe_match_name(self, other) + return self._constructor(na_op(self.values, other.values), + index=self.index, name=name) + elif isinstance(other, pd.DataFrame): + return NotImplemented + else: + # scalars + return self._constructor(na_op(self.values, other), + index=self.index, name=self.name) + return wrapper + + +# original Series _radd_compat method +def _radd_compat(left, right): + radd = lambda x, y: y + x + # GH #353, NumPy 1.5.1 workaround + try: + output = radd(left, right) + except TypeError: + cond = (pd._np_version_under1p6 and + left.dtype == np.object_) + if cond: # pragma: no cover + output = np.empty_like(left) + output.flat[:] = [radd(x, right) for x in left.flat] + else: + raise + + return output + + +def _flex_method_SERIES(op, name, str_rep=None, default_axis=None, + fill_zeros=None, **eval_kwargs): + doc = """ + Binary operator %s with support to substitute a fill_value for missing data + in one of the inputs + + Parameters + ---------- + other: Series or scalar value + fill_value : None or float value, default None (NaN) + Fill missing (NaN) values with this value. If both Series are + missing, the result will be missing + level : int or name + Broadcast across a level, matching Index values on the + passed MultiIndex level + + Returns + ------- + result : Series + """ % name + + @Appender(doc) + def f(self, other, level=None, fill_value=None): + if isinstance(other, pd.Series): + return self._binop(other, op, level=level, fill_value=fill_value) + elif isinstance(other, (pa.Array, pd.Series, list, tuple)): + if len(other) != len(self): + raise ValueError('Lengths must be equal') + return self._binop(self._constructor(other, self.index), op, + level=level, fill_value=fill_value) + else: + return self._constructor(op(self.values, other), self.index, + name=self.name) + + f.__name__ = name + return f + +series_flex_funcs = dict(flex_arith_method=_flex_method_SERIES, + radd_func=_radd_compat, + flex_comp_method=_comp_method_SERIES) + +series_special_funcs = dict(arith_method=_arith_method_SERIES, + radd_func=_radd_compat, + comp_method=_comp_method_SERIES, + bool_method=_bool_method_SERIES) + + +_arith_doc_FRAME = """ +Binary operator %s with support to substitute a fill_value for missing data in +one of the inputs + +Parameters +---------- +other : Series, DataFrame, or constant +axis : {0, 1, 'index', 'columns'} + For Series input, axis to match Series index on +fill_value : None or float value, default None + Fill missing (NaN) values with this value. If both DataFrame locations are + missing, the result will be missing +level : int or name + Broadcast across a level, matching Index values on the + passed MultiIndex level + +Notes +----- +Mismatched indices will be unioned together + +Returns +------- +result : DataFrame +""" + + +def _arith_method_FRAME(op, name, str_rep=None, default_axis='columns', fill_zeros=None, **eval_kwargs): + def na_op(x, y): + try: + result = expressions.evaluate( + op, str_rep, x, y, raise_on_error=True, **eval_kwargs) + except TypeError: + xrav = x.ravel() + result = np.empty(x.size, dtype=x.dtype) + if isinstance(y, (np.ndarray, pd.Series)): + yrav = y.ravel() + mask = notnull(xrav) & notnull(yrav) + result[mask] = op(xrav[mask], yrav[mask]) + else: + mask = notnull(xrav) + result[mask] = op(xrav[mask], y) + + result, changed = com._maybe_upcast_putmask(result, -mask, np.nan) + result = result.reshape(x.shape) + + result = com._fill_zeros(result, y, fill_zeros) + + return result + + @Appender(_arith_doc_FRAME % name) + def f(self, other, axis=default_axis, level=None, fill_value=None): + if isinstance(other, pd.DataFrame): # Another DataFrame + return self._combine_frame(other, na_op, fill_value, level) + elif isinstance(other, pd.Series): + return self._combine_series(other, na_op, fill_value, axis, level) + elif isinstance(other, (list, tuple)): + if axis is not None and self._get_axis_name(axis) == 'index': + # casted = self._constructor_sliced(other, index=self.index) + casted = pd.Series(other, index=self.index) + else: + # casted = self._constructor_sliced(other, index=self.columns) + casted = pd.Series(other, index=self.columns) + return self._combine_series(casted, na_op, fill_value, axis, level) + elif isinstance(other, np.ndarray): + if other.ndim == 1: + if axis is not None and self._get_axis_name(axis) == 'index': + # casted = self._constructor_sliced(other, index=self.index) + casted = pd.Series(other, index=self.index) + else: + # casted = self._constructor_sliced(other, index=self.columns) + casted = pd.Series(other, index=self.columns) + return self._combine_series(casted, na_op, fill_value, + axis, level) + elif other.ndim == 2: + # casted = self._constructor(other, index=self.index, + # columns=self.columns) + casted = pd.DataFrame(other, index=self.index, + columns=self.columns) + return self._combine_frame(casted, na_op, fill_value, level) + else: + raise ValueError("Incompatible argument shape: %s" % + (other.shape,)) + else: + return self._combine_const(other, na_op) + + f.__name__ = name + + return f + + +# Masker unused for now +def _flex_comp_method_FRAME(op, name, str_rep=None, default_axis='columns', + masker=False): + + def na_op(x, y): + try: + result = op(x, y) + except TypeError: + xrav = x.ravel() + result = np.empty(x.size, dtype=x.dtype) + if isinstance(y, (np.ndarray, pd.Series)): + yrav = y.ravel() + mask = notnull(xrav) & notnull(yrav) + result[mask] = op(np.array(list(xrav[mask])), + np.array(list(yrav[mask]))) + else: + mask = notnull(xrav) + result[mask] = op(np.array(list(xrav[mask])), y) + + if op == operator.ne: # pragma: no cover + np.putmask(result, -mask, True) + else: + np.putmask(result, -mask, False) + result = result.reshape(x.shape) + + return result + + @Appender('Wrapper for flexible comparison methods %s' % name) + def f(self, other, axis=default_axis, level=None): + if isinstance(other, pd.DataFrame): # Another DataFrame + return self._flex_compare_frame(other, na_op, str_rep, level) + + elif isinstance(other, pd.Series): + return self._combine_series(other, na_op, None, axis, level) + + elif isinstance(other, (list, tuple)): + if axis is not None and self._get_axis_name(axis) == 'index': + casted = pd.Series(other, index=self.index) + else: + casted = pd.Series(other, index=self.columns) + + return self._combine_series(casted, na_op, None, axis, level) + + elif isinstance(other, np.ndarray): + if other.ndim == 1: + if axis is not None and self._get_axis_name(axis) == 'index': + casted = pd.Series(other, index=self.index) + else: + casted = pd.Series(other, index=self.columns) + + return self._combine_series(casted, na_op, None, axis, level) + + elif other.ndim == 2: + casted = pd.DataFrame(other, index=self.index, + columns=self.columns) + + return self._flex_compare_frame(casted, na_op, str_rep, level) + + else: + raise ValueError("Incompatible argument shape: %s" % + (other.shape,)) + + else: + return self._combine_const(other, na_op) + + f.__name__ = name + + return f + + +def _comp_method_FRAME(func, name, str_rep, masker=False): + @Appender('Wrapper for comparison method %s' % name) + def f(self, other): + if isinstance(other, pd.DataFrame): # Another DataFrame + return self._compare_frame(other, func, str_rep) + elif isinstance(other, pd.Series): + return self._combine_series_infer(other, func) + else: + + # straight boolean comparisions we want to allow all columns + # (regardless of dtype to pass thru) See #4537 for discussion. + return self._combine_const(other, func, raise_on_error=False).fillna(True).astype(bool) + + f.__name__ = name + + return f + + +frame_flex_funcs = dict(flex_arith_method=_arith_method_FRAME, + radd_func=_radd_compat, + flex_comp_method=_flex_comp_method_FRAME) + + +frame_special_funcs = dict(arith_method=_arith_method_FRAME, + radd_func=_radd_compat, + comp_method=_comp_method_FRAME, + bool_method=_arith_method_FRAME) + + +def _arith_method_PANEL(op, name, str_rep=None, fill_zeros=None, + default_axis=None, **eval_kwargs): + # copied from Series na_op above, but without unnecessary branch for + # non-scalar + def na_op(x, y): + try: + result = expressions.evaluate(op, str_rep, x, y, + raise_on_error=True, **eval_kwargs) + except TypeError: + result = pa.empty(len(x), dtype=x.dtype) + mask = notnull(x) + result[mask] = op(x[mask], y) + result, changed = com._maybe_upcast_putmask(result, -mask, pa.NA) + + result = com._fill_zeros(result, y, fill_zeros) + return result + # work only for scalars + + def f(self, other): + if not np.isscalar(other): + raise ValueError('Simple arithmetic with %s can only be ' + 'done with scalar values' % self._constructor.__name__) + + return self._combine(other, op) + f.__name__ = name + return f + + +def _comp_method_PANEL(op, name, str_rep=None, masker=False): + + def na_op(x, y): + try: + result = expressions.evaluate(op, str_rep, x, y, + raise_on_error=True) + except TypeError: + xrav = x.ravel() + result = np.empty(x.size, dtype=bool) + if isinstance(y, np.ndarray): + yrav = y.ravel() + mask = notnull(xrav) & notnull(yrav) + result[mask] = op(np.array(list(xrav[mask])), + np.array(list(yrav[mask]))) + else: + mask = notnull(xrav) + result[mask] = op(np.array(list(xrav[mask])), y) + + if op == operator.ne: # pragma: no cover + np.putmask(result, -mask, True) + else: + np.putmask(result, -mask, False) + result = result.reshape(x.shape) + + return result + + @Appender('Wrapper for comparison method %s' % name) + def f(self, other): + if isinstance(other, self._constructor): + return self._compare_constructor(other, na_op) + elif isinstance(other, (self._constructor_sliced, pd.DataFrame, + pd.Series)): + raise Exception("input needs alignment for this object [%s]" % + self._constructor) + else: + return self._combine_const(other, na_op) + + f.__name__ = name + + return f + + +panel_special_funcs = dict(arith_method=_arith_method_PANEL, + comp_method=_comp_method_PANEL, + bool_method=_arith_method_PANEL) diff --git a/pandas/core/panel.py b/pandas/core/panel.py index 697344639c41b..7208ceff7d1a7 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -5,7 +5,6 @@ from pandas.compat import map, zip, range, lrange, lmap, u, OrderedDict, OrderedDefaultdict from pandas import compat -import operator import sys import numpy as np from pandas.core.common import (PandasError, @@ -18,14 +17,14 @@ from pandas.core.internals import (BlockManager, create_block_manager_from_arrays, create_block_manager_from_blocks) -from pandas.core.series import Series from pandas.core.frame import DataFrame from pandas.core.generic import NDFrame from pandas import compat from pandas.util.decorators import deprecate, Appender, Substitution import pandas.core.common as com +import pandas.core.ops as ops import pandas.core.nanops as nanops -import pandas.lib as lib +import pandas.computation.expressions as expressions def _ensure_like_indices(time, panels): @@ -91,57 +90,6 @@ def panel_index(time, panels, names=['time', 'panel']): return MultiIndex(levels, labels, sortorder=None, names=names) -def _arith_method(func, name): - # work only for scalars - - def f(self, other): - if not np.isscalar(other): - raise ValueError('Simple arithmetic with %s can only be ' - 'done with scalar values' % self._constructor.__name__) - - return self._combine(other, func) - f.__name__ = name - return f - - -def _comp_method(func, name): - - def na_op(x, y): - try: - result = func(x, y) - except TypeError: - xrav = x.ravel() - result = np.empty(x.size, dtype=x.dtype) - if isinstance(y, np.ndarray): - yrav = y.ravel() - mask = notnull(xrav) & notnull(yrav) - result[mask] = func(np.array(list(xrav[mask])), - np.array(list(yrav[mask]))) - else: - mask = notnull(xrav) - result[mask] = func(np.array(list(xrav[mask])), y) - - if func == operator.ne: # pragma: no cover - np.putmask(result, -mask, True) - else: - np.putmask(result, -mask, False) - result = result.reshape(x.shape) - - return result - - @Appender('Wrapper for comparison method %s' % name) - def f(self, other): - if isinstance(other, self._constructor): - return self._compare_constructor(other, func) - elif isinstance(other, (self._constructor_sliced, DataFrame, Series)): - raise Exception("input needs alignment for this object [%s]" % - self._constructor) - else: - return self._combine_const(other, na_op) - - f.__name__ = name - - return f class Panel(NDFrame): @@ -289,25 +237,6 @@ def from_dict(cls, data, intersect=False, orient='items', dtype=None): d[cls._info_axis_name] = Index(ks) return cls(**d) - # Comparison methods - __add__ = _arith_method(operator.add, '__add__') - __sub__ = _arith_method(operator.sub, '__sub__') - __truediv__ = _arith_method(operator.truediv, '__truediv__') - __floordiv__ = _arith_method(operator.floordiv, '__floordiv__') - __mul__ = _arith_method(operator.mul, '__mul__') - __pow__ = _arith_method(operator.pow, '__pow__') - - __radd__ = _arith_method(operator.add, '__radd__') - __rmul__ = _arith_method(operator.mul, '__rmul__') - __rsub__ = _arith_method(lambda x, y: y - x, '__rsub__') - __rtruediv__ = _arith_method(lambda x, y: y / x, '__rtruediv__') - __rfloordiv__ = _arith_method(lambda x, y: y // x, '__rfloordiv__') - __rpow__ = _arith_method(lambda x, y: y ** x, '__rpow__') - - if not compat.PY3: - __div__ = _arith_method(operator.div, '__div__') - __rdiv__ = _arith_method(lambda x, y: y / x, '__rdiv__') - def __getitem__(self, key): if isinstance(self._info_axis, MultiIndex): return self._getitem_multilevel(key) @@ -365,26 +294,6 @@ def _compare_constructor(self, other, func): d = self._construct_axes_dict(copy=False) return self._constructor(data=new_data, **d) - # boolean operators - __and__ = _arith_method(operator.and_, '__and__') - __or__ = _arith_method(operator.or_, '__or__') - __xor__ = _arith_method(operator.xor, '__xor__') - - # Comparison methods - __eq__ = _comp_method(operator.eq, '__eq__') - __ne__ = _comp_method(operator.ne, '__ne__') - __lt__ = _comp_method(operator.lt, '__lt__') - __gt__ = _comp_method(operator.gt, '__gt__') - __le__ = _comp_method(operator.le, '__le__') - __ge__ = _comp_method(operator.ge, '__ge__') - - eq = _comp_method(operator.eq, 'eq') - ne = _comp_method(operator.ne, 'ne') - gt = _comp_method(operator.gt, 'gt') - lt = _comp_method(operator.lt, 'lt') - ge = _comp_method(operator.ge, 'ge') - le = _comp_method(operator.le, 'le') - #---------------------------------------------------------------------- # Magic methods @@ -1262,7 +1171,7 @@ def _extract_axis(self, data, axis=0, intersect=False): return _ensure_index(index) @classmethod - def _add_aggregate_operations(cls): + def _add_aggregate_operations(cls, use_numexpr=True): """ add the operations to the cls; evaluate the doc strings again """ # doc strings substitors @@ -1279,25 +1188,29 @@ def _add_aggregate_operations(cls): ------- """ + cls.__name__ + "\n" - def _panel_arith_method(op, name): + def _panel_arith_method(op, name, str_rep = None, default_axis=None, + fill_zeros=None, **eval_kwargs): + def na_op(x, y): + try: + result = expressions.evaluate(op, str_rep, x, y, raise_on_error=True, **eval_kwargs) + except TypeError: + result = op(x, y) + + # handles discrepancy between numpy and numexpr on division/mod by 0 + # though, given that these are generally (always?) non-scalars, I'm + # not sure whether it's worth it at the moment + result = com._fill_zeros(result,y,fill_zeros) + return result @Substitution(op) @Appender(_agg_doc) def f(self, other, axis=0): - return self._combine(other, op, axis=axis) + return self._combine(other, na_op, axis=axis) f.__name__ = name return f - - cls.add = _panel_arith_method(operator.add, 'add') - cls.subtract = cls.sub = _panel_arith_method(operator.sub, 'subtract') - cls.multiply = cls.mul = _panel_arith_method(operator.mul, 'multiply') - - try: - cls.divide = cls.div = _panel_arith_method(operator.div, 'divide') - except AttributeError: # pragma: no cover - # Python 3 - cls.divide = cls.div = _panel_arith_method( - operator.truediv, 'divide') - + # add `div`, `mul`, `pow`, etc.. + ops.add_flex_arithmetic_methods(cls, _panel_arith_method, + use_numexpr=use_numexpr, + flex_comp_method=ops._comp_method_PANEL) _agg_doc = """ Return %(desc)s over requested axis @@ -1385,6 +1298,8 @@ def min(self, axis='major', skipna=True): 'minor': 'minor_axis'}, slicers={'major_axis': 'index', 'minor_axis': 'columns'}) + +ops.add_special_arithmetic_methods(Panel, **ops.panel_special_funcs) Panel._add_aggregate_operations() WidePanel = Panel diff --git a/pandas/core/series.py b/pandas/core/series.py index aeb63ecbe268f..38e22e7a9ed3a 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -6,7 +6,6 @@ # pylint: disable=W0703,W0622,W0613,W0201 import operator -from distutils.version import LooseVersion import types from numpy import nan, ndarray @@ -21,7 +20,7 @@ _values_from_object, _possibly_cast_to_datetime, _possibly_castable, _possibly_convert_platform, - ABCSparseArray) + ABCSparseArray, _maybe_match_name) from pandas.core.index import (Index, MultiIndex, InvalidIndexError, _ensure_index, _handle_legacy_indexes) from pandas.core.indexing import ( @@ -32,13 +31,12 @@ from pandas.core.categorical import Categorical from pandas.tseries.index import DatetimeIndex from pandas.tseries.period import PeriodIndex, Period -from pandas.tseries.offsets import DateOffset -from pandas.tseries.timedeltas import _possibly_cast_to_timedelta from pandas import compat from pandas.util.terminal import get_terminal_size from pandas.compat import zip, lzip, u, OrderedDict import pandas.core.array as pa +import pandas.core.ops as ops import pandas.core.common as com import pandas.core.datetools as datetools @@ -55,387 +53,6 @@ __all__ = ['Series'] -_np_version = np.version.short_version -_np_version_under1p6 = LooseVersion(_np_version) < '1.6' -_np_version_under1p7 = LooseVersion(_np_version) < '1.7' - -class _TimeOp(object): - """ - Wrapper around Series datetime/time/timedelta arithmetic operations. - Generally, you should use classmethod ``maybe_convert_for_time_op`` as an - entry point. - """ - fill_value = tslib.iNaT - wrap_results = staticmethod(lambda x: x) - dtype = None - - def __init__(self, left, right, name): - self.name = name - - lvalues = self._convert_to_array(left, name=name) - rvalues = self._convert_to_array(right, name=name) - - self.is_timedelta_lhs = com.is_timedelta64_dtype(left) - self.is_datetime_lhs = com.is_datetime64_dtype(left) - self.is_integer_lhs = left.dtype.kind in ['i','u'] - self.is_datetime_rhs = com.is_datetime64_dtype(rvalues) - self.is_timedelta_rhs = com.is_timedelta64_dtype(rvalues) or (not self.is_datetime_rhs and _np_version_under1p7) - self.is_integer_rhs = rvalues.dtype.kind in ('i','u') - - self._validate() - - self._convert_for_datetime(lvalues, rvalues) - - def _validate(self): - # timedelta and integer mul/div - - if (self.is_timedelta_lhs and self.is_integer_rhs) or\ - (self.is_integer_lhs and self.is_timedelta_rhs): - - if self.name not in ('__truediv__','__div__','__mul__'): - raise TypeError("can only operate on a timedelta and an integer for " - "division, but the operator [%s] was passed" % self.name) - - # 2 datetimes - elif self.is_datetime_lhs and self.is_datetime_rhs: - if self.name != '__sub__': - raise TypeError("can only operate on a datetimes for subtraction, " - "but the operator [%s] was passed" % self.name) - - - # 2 timedeltas - elif self.is_timedelta_lhs and self.is_timedelta_rhs: - - if self.name not in ('__div__', '__truediv__', '__add__', '__sub__'): - raise TypeError("can only operate on a timedeltas for " - "addition, subtraction, and division, but the operator [%s] was passed" % self.name) - - # datetime and timedelta - elif self.is_datetime_lhs and self.is_timedelta_rhs: - - if self.name not in ('__add__','__sub__'): - raise TypeError("can only operate on a datetime with a rhs of a timedelta for " - "addition and subtraction, but the operator [%s] was passed" % self.name) - - elif self.is_timedelta_lhs and self.is_datetime_rhs: - - if self.name != '__add__': - raise TypeError("can only operate on a timedelta and a datetime for " - "addition, but the operator [%s] was passed" % self.name) - else: - raise TypeError('cannot operate on a series with out a rhs ' - 'of a series/ndarray of type datetime64[ns] ' - 'or a timedelta') - - def _convert_to_array(self, values, name=None): - """converts values to ndarray""" - coerce = 'compat' if _np_version_under1p7 else True - if not is_list_like(values): - values = np.array([values]) - inferred_type = lib.infer_dtype(values) - if inferred_type in ('datetime64','datetime','date','time'): - # a datetlike - if not (isinstance(values, (pa.Array, Series)) and com.is_datetime64_dtype(values)): - values = tslib.array_to_datetime(values) - elif isinstance(values, DatetimeIndex): - values = values.to_series() - elif inferred_type in ('timedelta', 'timedelta64'): - # have a timedelta, convert to to ns here - values = _possibly_cast_to_timedelta(values, coerce=coerce) - elif inferred_type == 'integer': - # py3 compat where dtype is 'm' but is an integer - if values.dtype.kind == 'm': - values = values.astype('timedelta64[ns]') - elif isinstance(values, PeriodIndex): - values = values.to_timestamp().to_series() - elif name not in ('__truediv__','__div__','__mul__'): - raise TypeError("incompatible type for a datetime/timedelta " - "operation [{0}]".format(name)) - elif isinstance(values[0],DateOffset): - # handle DateOffsets - os = pa.array([ getattr(v,'delta',None) for v in values ]) - mask = isnull(os) - if mask.any(): - raise TypeError("cannot use a non-absolute DateOffset in " - "datetime/timedelta operations [{0}]".format( - ','.join([ com.pprint_thing(v) for v in values[mask] ]))) - values = _possibly_cast_to_timedelta(os, coerce=coerce) - else: - raise TypeError("incompatible type [{0}] for a datetime/timedelta operation".format(pa.array(values).dtype)) - - return values - - def _convert_for_datetime(self, lvalues, rvalues): - mask = None - # datetimes require views - if self.is_datetime_lhs or self.is_datetime_rhs: - # datetime subtraction means timedelta - if self.is_datetime_lhs and self.is_datetime_rhs: - self.dtype = 'timedelta64[ns]' - else: - self.dtype = 'datetime64[ns]' - mask = isnull(lvalues) | isnull(rvalues) - lvalues = lvalues.view(np.int64) - rvalues = rvalues.view(np.int64) - - # otherwise it's a timedelta - else: - self.dtype = 'timedelta64[ns]' - mask = isnull(lvalues) | isnull(rvalues) - lvalues = lvalues.astype(np.int64) - rvalues = rvalues.astype(np.int64) - - # time delta division -> unit less - # integer gets converted to timedelta in np < 1.6 - if (self.is_timedelta_lhs and self.is_timedelta_rhs) and\ - not self.is_integer_rhs and\ - not self.is_integer_lhs and\ - self.name in ('__div__', '__truediv__'): - self.dtype = 'float64' - self.fill_value = np.nan - lvalues = lvalues.astype(np.float64) - rvalues = rvalues.astype(np.float64) - - # if we need to mask the results - if mask is not None: - if mask.any(): - def f(x): - x = pa.array(x,dtype=self.dtype) - np.putmask(x,mask,self.fill_value) - return x - self.wrap_results = f - self.lvalues = lvalues - self.rvalues = rvalues - - @classmethod - def maybe_convert_for_time_op(cls, left, right, name): - """ - if ``left`` and ``right`` are appropriate for datetime arithmetic with - operation ``name``, processes them and returns a ``_TimeOp`` object - that stores all the required values. Otherwise, it will generate - either a ``NotImplementedError`` or ``None``, indicating that the - operation is unsupported for datetimes (e.g., an unsupported r_op) or - that the data is not the right type for time ops. - """ - # decide if we can do it - is_timedelta_lhs = com.is_timedelta64_dtype(left) - is_datetime_lhs = com.is_datetime64_dtype(left) - if not (is_datetime_lhs or is_timedelta_lhs): - return None - # rops currently disabled - if name.startswith('__r'): - return NotImplemented - - return cls(left, right, name) - -#---------------------------------------------------------------------- -# Wrapper function for Series arithmetic methods - -def _arith_method(op, name, fill_zeros=None): - """ - Wrapper function for Series arithmetic operations, to avoid - code duplication. - """ - def na_op(x, y): - try: - - result = op(x, y) - result = com._fill_zeros(result, y, fill_zeros) - - except TypeError: - result = pa.empty(len(x), dtype=x.dtype) - if isinstance(y, (pa.Array, Series)): - mask = notnull(x) & notnull(y) - result[mask] = op(x[mask], y[mask]) - else: - mask = notnull(x) - result[mask] = op(x[mask], y) - - result, changed = com._maybe_upcast_putmask(result, -mask, pa.NA) - - return result - - def wrapper(left, right, name=name): - from pandas.core.frame import DataFrame - - time_converted = _TimeOp.maybe_convert_for_time_op(left, right, name) - - if time_converted is None: - lvalues, rvalues = left, right - dtype = None - wrap_results = lambda x: x - elif time_converted == NotImplemented: - return NotImplemented - else: - lvalues = time_converted.lvalues - rvalues = time_converted.rvalues - dtype = time_converted.dtype - wrap_results = time_converted.wrap_results - - if isinstance(rvalues, Series): - - join_idx, lidx, ridx = left.index.join(rvalues.index, how='outer', - return_indexers=True) - rindex = rvalues.index - name = _maybe_match_name(left, rvalues) - lvalues = getattr(lvalues, 'values', lvalues) - rvalues = getattr(rvalues, 'values', rvalues) - if left.index.equals(rindex): - index = left.index - else: - index = join_idx - - if lidx is not None: - lvalues = com.take_1d(lvalues, lidx) - - if ridx is not None: - rvalues = com.take_1d(rvalues, ridx) - - arr = na_op(lvalues, rvalues) - - return left._constructor(wrap_results(arr), index=index, - name=name, dtype=dtype) - elif isinstance(right, DataFrame): - return NotImplemented - else: - # scalars - if hasattr(lvalues, 'values'): - lvalues = lvalues.values - return left._constructor(wrap_results(na_op(lvalues, rvalues)), - index=left.index, name=left.name, dtype=dtype) - return wrapper - - -def _comp_method(op, name, masker=False): - """ - Wrapper function for Series arithmetic operations, to avoid - code duplication. - """ - def na_op(x, y): - if x.dtype == np.object_: - if isinstance(y, list): - y = lib.list_to_object_array(y) - - if isinstance(y, (pa.Array, Series)): - if y.dtype != np.object_: - result = lib.vec_compare(x, y.astype(np.object_), op) - else: - result = lib.vec_compare(x, y, op) - else: - result = lib.scalar_compare(x, y, op) - else: - - try: - result = getattr(x,name)(y) - if result is NotImplemented: - raise TypeError("invalid type comparison") - except (AttributeError): - result = op(x, y) - - return result - - def wrapper(self, other): - from pandas.core.frame import DataFrame - - if isinstance(other, Series): - name = _maybe_match_name(self, other) - if len(self) != len(other): - raise ValueError('Series lengths must match to compare') - return self._constructor(na_op(self.values, other.values), - index=self.index, name=name) - elif isinstance(other, DataFrame): # pragma: no cover - return NotImplemented - elif isinstance(other, (pa.Array, Series)): - if len(self) != len(other): - raise ValueError('Lengths must match to compare') - return self._constructor(na_op(self.values, np.asarray(other)), - index=self.index, name=self.name) - else: - - mask = isnull(self) - - values = self.values - other = _index.convert_scalar(values, other) - - if issubclass(values.dtype.type, np.datetime64): - values = values.view('i8') - - # scalars - res = na_op(values, other) - if np.isscalar(res): - raise TypeError('Could not compare %s type with Series' - % type(other)) - - # always return a full value series here - res = _values_from_object(res) - - res = Series(res, index=self.index, name=self.name, dtype='bool') - - # mask out the invalids - if mask.any(): - res[mask.values] = masker - - return res - return wrapper - - -def _bool_method(op, name): - """ - Wrapper function for Series arithmetic operations, to avoid - code duplication. - """ - def na_op(x, y): - try: - result = op(x, y) - except TypeError: - if isinstance(y, list): - y = lib.list_to_object_array(y) - - if isinstance(y, (pa.Array, Series)): - if (x.dtype == np.bool_ and - y.dtype == np.bool_): # pragma: no cover - result = op(x, y) # when would this be hit? - else: - x = com._ensure_object(x) - y = com._ensure_object(y) - result = lib.vec_binop(x, y, op) - else: - result = lib.scalar_binop(x, y, op) - - return result - - def wrapper(self, other): - from pandas.core.frame import DataFrame - - if isinstance(other, Series): - name = _maybe_match_name(self, other) - return self._constructor(na_op(self.values, other.values), - index=self.index, name=name) - elif isinstance(other, DataFrame): - return NotImplemented - else: - # scalars - return self._constructor(na_op(self.values, other), - index=self.index, name=self.name) - return wrapper - - -def _radd_compat(left, right): - radd = lambda x, y: y + x - # GH #353, NumPy 1.5.1 workaround - try: - output = radd(left, right) - except TypeError: - cond = (_np_version_under1p6 and - left.dtype == np.object_) - if cond: # pragma: no cover - output = np.empty_like(left) - output.flat[:] = [radd(x, right) for x in left.flat] - else: - raise - - return output - def _coerce_method(converter): """ install the scalar coercion methods """ @@ -448,50 +65,6 @@ def wrapper(self): return wrapper -def _maybe_match_name(a, b): - name = None - if a.name == b.name: - name = a.name - return name - - -def _flex_method(op, name): - doc = """ - Binary operator %s with support to substitute a fill_value for missing data - in one of the inputs - - Parameters - ---------- - other: Series or scalar value - fill_value : None or float value, default None (NaN) - Fill missing (NaN) values with this value. If both Series are - missing, the result will be missing - level : int or name - Broadcast across a level, matching Index values on the - passed MultiIndex level - - Returns - ------- - result : Series - """ % name - - @Appender(doc) - def f(self, other, level=None, fill_value=None): - if isinstance(other, Series): - return self._binop(other, op, level=level, fill_value=fill_value) - elif isinstance(other, (pa.Array, Series, list, tuple)): - if len(other) != len(self): - raise ValueError('Lengths must be equal') - return self._binop(self._constructor(other, self.index), op, - level=level, fill_value=fill_value) - else: - return self._constructor(op(self.values, other), self.index, - name=self.name) - - f.__name__ = name - return f - - def _unbox(func): @Appender(func.__doc__) def f(self, *args, **kwargs): @@ -1423,37 +996,6 @@ def iteritems(self): if compat.PY3: # pragma: no cover items = iteritems - #---------------------------------------------------------------------- - # Arithmetic operators - - __add__ = _arith_method(operator.add, '__add__') - __sub__ = _arith_method(operator.sub, '__sub__') - __mul__ = _arith_method(operator.mul, '__mul__') - __truediv__ = _arith_method( - operator.truediv, '__truediv__', fill_zeros=np.inf) - __floordiv__ = _arith_method( - operator.floordiv, '__floordiv__', fill_zeros=np.inf) - __pow__ = _arith_method(operator.pow, '__pow__') - __mod__ = _arith_method(operator.mod, '__mod__', fill_zeros=np.nan) - - __radd__ = _arith_method(_radd_compat, '__add__') - __rmul__ = _arith_method(operator.mul, '__mul__') - __rsub__ = _arith_method(lambda x, y: y - x, '__sub__') - __rtruediv__ = _arith_method( - lambda x, y: y / x, '__truediv__', fill_zeros=np.inf) - __rfloordiv__ = _arith_method( - lambda x, y: y // x, '__floordiv__', fill_zeros=np.inf) - __rpow__ = _arith_method(lambda x, y: y ** x, '__pow__') - __rmod__ = _arith_method(lambda x, y: y % x, '__mod__', fill_zeros=np.nan) - - # comparisons - __gt__ = _comp_method(operator.gt, '__gt__') - __ge__ = _comp_method(operator.ge, '__ge__') - __lt__ = _comp_method(operator.lt, '__lt__') - __le__ = _comp_method(operator.le, '__le__') - __eq__ = _comp_method(operator.eq, '__eq__') - __ne__ = _comp_method(operator.ne, '__ne__', True) - # inversion def __neg__(self): arr = operator.neg(self.values) @@ -1463,26 +1005,6 @@ def __invert__(self): arr = operator.inv(self.values) return self._constructor(arr, self.index, name=self.name) - # binary logic - __or__ = _bool_method(operator.or_, '__or__') - __and__ = _bool_method(operator.and_, '__and__') - __xor__ = _bool_method(operator.xor, '__xor__') - - # Inplace operators - __iadd__ = __add__ - __isub__ = __sub__ - __imul__ = __mul__ - __itruediv__ = __truediv__ - __ifloordiv__ = __floordiv__ - __ipow__ = __pow__ - - # Python 2 division operators - if not compat.PY3: - __div__ = _arith_method(operator.div, '__div__', fill_zeros=np.inf) - __rdiv__ = _arith_method( - lambda x, y: y / x, '__div__', fill_zeros=np.inf) - __idiv__ = __div__ - #---------------------------------------------------------------------- # unbox reductions @@ -2245,16 +1767,6 @@ def _binop(self, other, func, level=None, fill_value=None): name = _maybe_match_name(self, other) return self._constructor(result, index=new_index, name=name) - add = _flex_method(operator.add, 'add') - sub = _flex_method(operator.sub, 'subtract') - mul = _flex_method(operator.mul, 'multiply') - try: - div = _flex_method(operator.div, 'divide') - except AttributeError: # pragma: no cover - # Python 3 - div = _flex_method(operator.truediv, 'divide') - mod = _flex_method(operator.mod, 'mod') - def combine(self, other, func, fill_value=nan): """ Perform elementwise binary operation on two Series using given function @@ -3281,3 +2793,7 @@ def _try_cast(arr, take_fast_path): Series.plot = _gfx.plot_series Series.hist = _gfx.hist_series + +# Add arithmetic! +ops.add_flex_arithmetic_methods(Series, **ops.series_flex_funcs) +ops.add_special_arithmetic_methods(Series, **ops.series_special_funcs) diff --git a/pandas/sparse/array.py b/pandas/sparse/array.py index 8a50a000a9526..bed4ede6ce5f3 100644 --- a/pandas/sparse/array.py +++ b/pandas/sparse/array.py @@ -7,7 +7,6 @@ from numpy import nan, ndarray import numpy as np -import operator from pandas.core.base import PandasObject import pandas.core.common as com @@ -17,21 +16,26 @@ from pandas._sparse import BlockIndex, IntIndex import pandas._sparse as splib import pandas.index as _index +import pandas.core.ops as ops -def _sparse_op_wrap(op, name): +def _arith_method(op, name, str_rep=None, default_axis=None, + fill_zeros=None, **eval_kwargs): """ Wrapper function for Series arithmetic operations, to avoid code duplication. """ - def wrapper(self, other): if isinstance(other, np.ndarray): if len(self) != len(other): - raise AssertionError("Operands must be of the same size") - if not isinstance(other, SparseArray): + raise AssertionError("length mismatch: %d vs. %d" % + (len(self), len(other))) + if not isinstance(other, com.ABCSparseArray): other = SparseArray(other, fill_value=self.fill_value) - return _sparse_array_op(self, other, op, name) + if name[0] == 'r': + return _sparse_array_op(other, self, op, name[1:]) + else: + return _sparse_array_op(self, other, op, name) elif np.isscalar(other): new_fill_value = op(np.float64(self.fill_value), np.float64(other)) @@ -41,7 +45,8 @@ def wrapper(self, other): fill_value=new_fill_value) else: # pragma: no cover raise TypeError('operation with %s not supported' % type(other)) - + if name.startswith("__"): + name = name[2:-2] wrapper.__name__ = name return wrapper @@ -218,23 +223,6 @@ def __unicode__(self): com.pprint_thing(self.fill_value), com.pprint_thing(self.sp_index)) - # Arithmetic operators - - __add__ = _sparse_op_wrap(operator.add, 'add') - __sub__ = _sparse_op_wrap(operator.sub, 'sub') - __mul__ = _sparse_op_wrap(operator.mul, 'mul') - __truediv__ = _sparse_op_wrap(operator.truediv, 'truediv') - __floordiv__ = _sparse_op_wrap(operator.floordiv, 'floordiv') - __pow__ = _sparse_op_wrap(operator.pow, 'pow') - - # reverse operators - __radd__ = _sparse_op_wrap(operator.add, 'add') - __rsub__ = _sparse_op_wrap(lambda x, y: y - x, 'rsub') - __rmul__ = _sparse_op_wrap(operator.mul, 'mul') - __rtruediv__ = _sparse_op_wrap(lambda x, y: y / x, 'rtruediv') - __rfloordiv__ = _sparse_op_wrap(lambda x, y: y // x, 'rfloordiv') - __rpow__ = _sparse_op_wrap(lambda x, y: y ** x, 'rpow') - def disable(self, other): raise NotImplementedError('inplace binary ops not supported') # Inplace operators @@ -247,8 +235,6 @@ def disable(self, other): # Python 2 division operators if not compat.PY3: - __div__ = _sparse_op_wrap(operator.div, 'div') - __rdiv__ = _sparse_op_wrap(lambda x, y: y / x, '__rdiv__') __idiv__ = disable @property @@ -539,3 +525,7 @@ def make_sparse(arr, kind='block', fill_value=nan): sparsified_values = arr[mask] return sparsified_values, index + +ops.add_special_arithmetic_methods(SparseArray, + arith_method=_arith_method, + use_numexpr=False) diff --git a/pandas/sparse/frame.py b/pandas/sparse/frame.py index 93b29cbf91b91..6f83ee90dd9da 100644 --- a/pandas/sparse/frame.py +++ b/pandas/sparse/frame.py @@ -25,6 +25,7 @@ from pandas.core.generic import NDFrame from pandas.sparse.series import SparseSeries, SparseArray from pandas.util.decorators import Appender +import pandas.core.ops as ops class SparseDataFrame(DataFrame): @@ -815,3 +816,9 @@ def homogenize(series_dict): output = series_dict return output + +# use unaccelerated ops for sparse objects +ops.add_flex_arithmetic_methods(SparseDataFrame, use_numexpr=False, + **ops.frame_flex_funcs) +ops.add_special_arithmetic_methods(SparseDataFrame, use_numexpr=False, + **ops.frame_special_funcs) diff --git a/pandas/sparse/panel.py b/pandas/sparse/panel.py index 286b683b1ea88..dd0204f11edfb 100644 --- a/pandas/sparse/panel.py +++ b/pandas/sparse/panel.py @@ -16,6 +16,7 @@ from pandas.util.decorators import deprecate import pandas.core.common as com +import pandas.core.ops as ops class SparsePanelAxis(object): @@ -462,6 +463,19 @@ def minor_xs(self, key): default_fill_value=self.default_fill_value, default_kind=self.default_kind) + # TODO: allow SparsePanel to work with flex arithmetic. + # pow and mod only work for scalars for now + def pow(self, val, *args, **kwargs): + """wrapper around `__pow__` (only works for scalar values)""" + return self.__pow__(val) + + def mod(self, val, *args, **kwargs): + """wrapper around `__mod__` (only works for scalar values""" + return self.__mod__(val) + +# Sparse objects opt out of numexpr +SparsePanel._add_aggregate_operations(use_numexpr=False) +ops.add_special_arithmetic_methods(SparsePanel, use_numexpr=False, **ops.panel_special_funcs) SparseWidePanel = SparsePanel diff --git a/pandas/sparse/series.py b/pandas/sparse/series.py index 50e80e0c202d5..eb97eec75be36 100644 --- a/pandas/sparse/series.py +++ b/pandas/sparse/series.py @@ -10,13 +10,14 @@ import operator -from pandas.core.common import isnull, _values_from_object +from pandas.core.common import isnull, _values_from_object, _maybe_match_name from pandas.core.index import Index, _ensure_index -from pandas.core.series import Series, _maybe_match_name +from pandas.core.series import Series from pandas.core.frame import DataFrame from pandas.core.internals import SingleBlockManager from pandas.core import generic import pandas.core.common as com +import pandas.core.ops as ops import pandas.core.datetools as datetools import pandas.index as _index @@ -32,10 +33,14 @@ # Wrapper function for Series arithmetic methods -def _sparse_op_wrap(op, name): +def _arith_method(op, name, str_rep=None, default_axis=None, fill_zeros=None, + **eval_kwargs): """ Wrapper function for Series arithmetic operations, to avoid code duplication. + + str_rep, default_axis, fill_zeros and eval_kwargs are not used, but are present + for compatibility. """ def wrapper(self, other): @@ -61,6 +66,10 @@ def wrapper(self, other): raise TypeError('operation with %s not supported' % type(other)) wrapper.__name__ = name + if name.startswith("__"): + # strip special method names, e.g. `__add__` needs to be `add` when passed + # to _sparse_series_op + name = name[2:-2] return wrapper @@ -272,36 +281,6 @@ def __unicode__(self): rep = '%s\n%s' % (series_rep, repr(self.sp_index)) return rep - # Arithmetic operators - - __add__ = _sparse_op_wrap(operator.add, 'add') - __sub__ = _sparse_op_wrap(operator.sub, 'sub') - __mul__ = _sparse_op_wrap(operator.mul, 'mul') - __truediv__ = _sparse_op_wrap(operator.truediv, 'truediv') - __floordiv__ = _sparse_op_wrap(operator.floordiv, 'floordiv') - __pow__ = _sparse_op_wrap(operator.pow, 'pow') - - # Inplace operators - __iadd__ = __add__ - __isub__ = __sub__ - __imul__ = __mul__ - __itruediv__ = __truediv__ - __ifloordiv__ = __floordiv__ - __ipow__ = __pow__ - - # reverse operators - __radd__ = _sparse_op_wrap(operator.add, '__radd__') - __rsub__ = _sparse_op_wrap(lambda x, y: y - x, '__rsub__') - __rmul__ = _sparse_op_wrap(operator.mul, '__rmul__') - __rtruediv__ = _sparse_op_wrap(lambda x, y: y / x, '__rtruediv__') - __rfloordiv__ = _sparse_op_wrap(lambda x, y: y // x, 'floordiv') - __rpow__ = _sparse_op_wrap(lambda x, y: y ** x, '__rpow__') - - # Python 2 division operators - if not compat.PY3: - __div__ = _sparse_op_wrap(operator.div, 'div') - __rdiv__ = _sparse_op_wrap(lambda x, y: y / x, '__rdiv__') - def __array_wrap__(self, result): """ Gets called prior to a ufunc (and after) @@ -659,5 +638,16 @@ def combine_first(self, other): dense_combined = self.to_dense().combine_first(other) return dense_combined.to_sparse(fill_value=self.fill_value) +# overwrite series methods with unaccelerated versions +ops.add_special_arithmetic_methods(SparseSeries, use_numexpr=False, + **ops.series_special_funcs) +ops.add_flex_arithmetic_methods(SparseSeries, use_numexpr=False, + **ops.series_flex_funcs) +# overwrite basic arithmetic to use SparseSeries version +# force methods to overwrite previous definitions. +ops.add_special_arithmetic_methods(SparseSeries, _arith_method, + radd_func=operator.add, comp_method=None, + bool_method=None, use_numexpr=False, force=True) + # backwards compatiblity SparseTimeSeries = SparseSeries diff --git a/pandas/tests/test_expressions.py b/pandas/tests/test_expressions.py index 56f52447aadfe..85f5ba1f08b1d 100644 --- a/pandas/tests/test_expressions.py +++ b/pandas/tests/test_expressions.py @@ -10,12 +10,16 @@ import numpy as np from numpy.testing import assert_array_equal -from pandas.core.api import DataFrame +from pandas.core.api import DataFrame, Panel from pandas.computation import expressions as expr - -from pandas.util.testing import assert_series_equal, assert_frame_equal from pandas import compat +from pandas.util.testing import (assert_almost_equal, assert_series_equal, + assert_frame_equal, assert_panel_equal, + assert_panel4d_equal) +import pandas.util.testing as tm +from numpy.testing.decorators import slow + if not expr._USE_NUMEXPR: try: @@ -31,6 +35,18 @@ _mixed = DataFrame({ 'A' : _frame['A'].copy(), 'B' : _frame['B'].astype('float32'), 'C' : _frame['C'].astype('int64'), 'D' : _frame['D'].astype('int32') }) _mixed2 = DataFrame({ 'A' : _frame2['A'].copy(), 'B' : _frame2['B'].astype('float32'), 'C' : _frame2['C'].astype('int64'), 'D' : _frame2['D'].astype('int32') }) _integer = DataFrame(np.random.randint(1, 100, size=(10001, 4)), columns = list('ABCD'), dtype='int64') +_integer2 = DataFrame(np.random.randint(1, 100, size=(101, 4)), + columns=list('ABCD'), dtype='int64') +_frame_panel = Panel(dict(ItemA=_frame.copy(), ItemB=(_frame.copy() + 3), ItemC=_frame.copy(), ItemD=_frame.copy())) +_frame2_panel = Panel(dict(ItemA=_frame2.copy(), ItemB=(_frame2.copy() + 3), + ItemC=_frame2.copy(), ItemD=_frame2.copy())) +_integer_panel = Panel(dict(ItemA=_integer, + ItemB=(_integer + 34).astype('int64'))) +_integer2_panel = Panel(dict(ItemA=_integer2, + ItemB=(_integer2 + 34).astype('int64'))) +_mixed_panel = Panel(dict(ItemA=_mixed, ItemB=(_mixed + 3))) +_mixed2_panel = Panel(dict(ItemA=_mixed2, ItemB=(_mixed2 + 3))) + class TestExpressions(unittest.TestCase): @@ -48,20 +64,27 @@ def setUp(self): def tearDown(self): expr._MIN_ELEMENTS = self._MIN_ELEMENTS - #TODO: add test for Panel - #TODO: add tests for binary operations @nose.tools.nottest - def run_arithmetic_test(self, df, assert_func, check_dtype=False): + def run_arithmetic_test(self, df, other, assert_func, check_dtype=False, + test_flex=True): expr._MIN_ELEMENTS = 0 - operations = ['add', 'sub', 'mul','mod','truediv','floordiv','pow'] + operations = ['add', 'sub', 'mul', 'mod', 'truediv', 'floordiv', 'pow'] if not compat.PY3: operations.append('div') for arith in operations: - op = getattr(operator, arith) + if test_flex: + op = getattr(df, arith) + else: + op = getattr(operator, arith) + if test_flex: + op = lambda x, y: getattr(df, arith)(y) + op.__name__ = arith + else: + op = getattr(operator, arith) expr.set_use_numexpr(False) - expected = op(df, df) + expected = op(df, other) expr.set_use_numexpr(True) - result = op(df, df) + result = op(df, other) try: if check_dtype: if arith == 'div': @@ -74,24 +97,150 @@ def run_arithmetic_test(self, df, assert_func, check_dtype=False): raise def test_integer_arithmetic(self): - self.run_arithmetic_test(self.integer, assert_frame_equal) - self.run_arithmetic_test(self.integer.icol(0), assert_series_equal, - check_dtype=True) + self.run_arithmetic_test(self.integer, self.integer, + assert_frame_equal) + self.run_arithmetic_test(self.integer.icol(0), self.integer.icol(0), + assert_series_equal, check_dtype=True) + + @nose.tools.nottest + def run_binary_test(self, df, other, assert_func, check_dtype=False, + test_flex=False, numexpr_ops=set(['gt', 'lt', 'ge', + 'le', 'eq', 'ne'])): + """ + tests solely that the result is the same whether or not numexpr is + enabled. Need to test whether the function does the correct thing + elsewhere. + """ + expr._MIN_ELEMENTS = 0 + expr.set_test_mode(True) + operations = ['gt', 'lt', 'ge', 'le', 'eq', 'ne'] + for arith in operations: + if test_flex: + op = lambda x, y: getattr(df, arith)(y) + op.__name__ = arith + else: + op = getattr(operator, arith) + expr.set_use_numexpr(False) + expected = op(df, other) + expr.set_use_numexpr(True) + expr.get_test_result() + result = op(df, other) + used_numexpr = expr.get_test_result() + try: + if check_dtype: + if arith == 'div': + assert expected.dtype.kind == result.dtype.kind + if arith == 'truediv': + assert result.dtype.kind == 'f' + if arith in numexpr_ops: + assert used_numexpr, "Did not use numexpr as expected." + else: + assert not used_numexpr, "Used numexpr unexpectedly." + assert_func(expected, result) + except Exception: + print("Failed test with operation %r" % arith) + print("test_flex was %r" % test_flex) + raise + + def run_frame(self, df, other, binary_comp=None, run_binary=True, + **kwargs): + self.run_arithmetic_test(df, other, assert_frame_equal, + test_flex=False, **kwargs) + self.run_arithmetic_test(df, other, assert_frame_equal, test_flex=True, + **kwargs) + if run_binary: + if binary_comp is None: + expr.set_use_numexpr(False) + binary_comp = other + 1 + expr.set_use_numexpr(True) + self.run_binary_test(df, binary_comp, assert_frame_equal, + test_flex=False, **kwargs) + self.run_binary_test(df, binary_comp, assert_frame_equal, + test_flex=True, **kwargs) + + def run_series(self, ser, other, binary_comp=None, **kwargs): + self.run_arithmetic_test(ser, other, assert_series_equal, + test_flex=False, **kwargs) + self.run_arithmetic_test(ser, other, assert_almost_equal, + test_flex=True, **kwargs) + # series doesn't uses vec_compare instead of numexpr... + # if binary_comp is None: + # binary_comp = other + 1 + # self.run_binary_test(ser, binary_comp, assert_frame_equal, test_flex=False, + # **kwargs) + # self.run_binary_test(ser, binary_comp, assert_frame_equal, test_flex=True, + # **kwargs) + + def run_panel(self, panel, other, binary_comp=None, run_binary=True, + assert_func=assert_panel_equal, **kwargs): + self.run_arithmetic_test(panel, other, assert_func, test_flex=False, + **kwargs) + self.run_arithmetic_test(panel, other, assert_func, test_flex=True, + **kwargs) + if run_binary: + if binary_comp is None: + binary_comp = other + 1 + self.run_binary_test(panel, binary_comp, assert_func, + test_flex=False, **kwargs) + self.run_binary_test(panel, binary_comp, assert_func, + test_flex=True, **kwargs) + + def test_integer_arithmetic_frame(self): + self.run_frame(self.integer, self.integer) + + def test_integer_arithmetic_series(self): + self.run_series(self.integer.icol(0), self.integer.icol(0)) + + @slow + def test_integer_panel(self): + self.run_panel(_integer2_panel, np.random.randint(1, 100)) + + def test_float_arithemtic_frame(self): + self.run_frame(self.frame2, self.frame2) + + def test_float_arithmetic_series(self): + self.run_series(self.frame2.icol(0), self.frame2.icol(0)) + + @slow + def test_float_panel(self): + self.run_panel(_frame2_panel, np.random.randn() + 0.1, binary_comp=0.8) + + @slow + def test_panel4d(self): + self.run_panel(tm.makePanel4D(), np.random.randn() + 0.5, + assert_func=assert_panel4d_equal, binary_comp=3) + + def test_mixed_arithmetic_frame(self): + # TODO: FIGURE OUT HOW TO GET IT TO WORK... + # can't do arithmetic because comparison methods try to do *entire* + # frame instead of by-column + self.run_frame(self.mixed2, self.mixed2, run_binary=False) + + def test_mixed_arithmetic_series(self): + for col in self.mixed2.columns: + self.run_series(self.mixed2[col], self.mixed2[col], binary_comp=4) + + @slow + def test_mixed_panel(self): + self.run_panel(_mixed2_panel, np.random.randint(1, 100), + binary_comp=-2) def test_float_arithemtic(self): - self.run_arithmetic_test(self.frame, assert_frame_equal) - self.run_arithmetic_test(self.frame.icol(0), assert_series_equal, - check_dtype=True) + self.run_arithmetic_test(self.frame, self.frame, assert_frame_equal) + self.run_arithmetic_test(self.frame.icol(0), self.frame.icol(0), + assert_series_equal, check_dtype=True) def test_mixed_arithmetic(self): - self.run_arithmetic_test(self.mixed, assert_frame_equal) + self.run_arithmetic_test(self.mixed, self.mixed, assert_frame_equal) for col in self.mixed.columns: - self.run_arithmetic_test(self.mixed[col], assert_series_equal) + self.run_arithmetic_test(self.mixed[col], self.mixed[col], + assert_series_equal) def test_integer_with_zeros(self): self.integer *= np.random.randint(0, 2, size=np.shape(self.integer)) - self.run_arithmetic_test(self.integer, assert_frame_equal) - self.run_arithmetic_test(self.integer.icol(0), assert_series_equal) + self.run_arithmetic_test(self.integer, self.integer, assert_frame_equal) + self.run_arithmetic_test(self.integer.icol(0), self.integer.icol(0), + assert_series_equal) def test_invalid(self): diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 8266502ccdece..a41072d97ddc3 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -4554,35 +4554,72 @@ def test_first_last_valid(self): self.assert_(index == frame.index[-6]) def test_arith_flex_frame(self): - ops = ['add', 'sub', 'mul', 'div', 'pow'] - aliases = {'div': 'truediv'} + ops = ['add', 'sub', 'mul', 'div', 'truediv', 'pow', 'floordiv', 'mod'] + if not compat.PY3: + aliases = {} + else: + aliases = {'div': 'truediv'} for op in ops: - alias = aliases.get(op, op) - f = getattr(operator, alias) - result = getattr(self.frame, op)(2 * self.frame) - exp = f(self.frame, 2 * self.frame) - assert_frame_equal(result, exp) - - # vs mix float - result = getattr(self.mixed_float, op)(2 * self.mixed_float) - exp = f(self.mixed_float, 2 * self.mixed_float) - assert_frame_equal(result, exp) - _check_mixed_float(result, dtype = dict(C = None)) - - # vs mix int - if op in ['add','sub','mul']: - result = getattr(self.mixed_int, op)(2 + self.mixed_int) - exp = f(self.mixed_int, 2 + self.mixed_int) - - # overflow in the uint - dtype = None - if op in ['sub']: - dtype = dict(B = 'object', C = None) - elif op in ['add','mul']: - dtype = dict(C = None) + try: + alias = aliases.get(op, op) + f = getattr(operator, alias) + result = getattr(self.frame, op)(2 * self.frame) + exp = f(self.frame, 2 * self.frame) + assert_frame_equal(result, exp) + + # vs mix float + result = getattr(self.mixed_float, op)(2 * self.mixed_float) + exp = f(self.mixed_float, 2 * self.mixed_float) assert_frame_equal(result, exp) - _check_mixed_int(result, dtype = dtype) + _check_mixed_float(result, dtype = dict(C = None)) + + # vs mix int + if op in ['add','sub','mul']: + result = getattr(self.mixed_int, op)(2 + self.mixed_int) + exp = f(self.mixed_int, 2 + self.mixed_int) + + # overflow in the uint + dtype = None + if op in ['sub']: + dtype = dict(B = 'object', C = None) + elif op in ['add','mul']: + dtype = dict(C = None) + assert_frame_equal(result, exp) + _check_mixed_int(result, dtype = dtype) + + # rops + r_f = lambda x, y: f(y, x) + result = getattr(self.frame, 'r' + op)(2 * self.frame) + exp = r_f(self.frame, 2 * self.frame) + assert_frame_equal(result, exp) + + # vs mix float + result = getattr(self.mixed_float, op)(2 * self.mixed_float) + exp = f(self.mixed_float, 2 * self.mixed_float) + assert_frame_equal(result, exp) + _check_mixed_float(result, dtype = dict(C = None)) + + result = getattr(self.intframe, op)(2 * self.intframe) + exp = f(self.intframe, 2 * self.intframe) + assert_frame_equal(result, exp) + + # vs mix int + if op in ['add','sub','mul']: + result = getattr(self.mixed_int, op)(2 + self.mixed_int) + exp = f(self.mixed_int, 2 + self.mixed_int) + + # overflow in the uint + dtype = None + if op in ['sub']: + dtype = dict(B = 'object', C = None) + elif op in ['add','mul']: + dtype = dict(C = None) + assert_frame_equal(result, exp) + _check_mixed_int(result, dtype = dtype) + except: + print("Failing operation %r" % op) + raise # ndim >= 3 ndim_5 = np.ones(self.frame.shape + (3, 4, 5)) diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 289bcb9db0c7e..5d3f7b350250d 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -1,8 +1,6 @@ # pylint: disable=W0612,E1101 from datetime import datetime -from pandas.compat import range, lrange, StringIO, cPickle, OrderedDict -from pandas import compat import operator import unittest import nose @@ -16,6 +14,7 @@ from pandas.core.series import remove_na import pandas.core.common as com from pandas import compat +from pandas.compat import range, lrange, StringIO, cPickle, OrderedDict from pandas.util.testing import (assert_panel_equal, assert_frame_equal, @@ -50,7 +49,7 @@ def test_cumsum(self): def not_hashable(self): c_empty = Panel() - c = Panel(pd.Panel([[[1]]])) + c = Panel(Panel([[[1]]])) self.assertRaises(TypeError, hash, c_empty) self.assertRaises(TypeError, hash, c) @@ -313,14 +312,32 @@ def check_op(op, name): assert_frame_equal(result.minor_xs(idx), op(self.panel.minor_xs(idx), xs)) + from pandas import SparsePanel + ops = ['add', 'sub', 'mul', 'truediv', 'floordiv'] + if not compat.PY3: + ops.append('div') + # pow, mod not supported for SparsePanel as flex ops (for now) + if not isinstance(self.panel, SparsePanel): + ops.extend(['pow', 'mod']) + else: + idx = self.panel.minor_axis[1] + with assertRaisesRegexp(ValueError, "Simple arithmetic.*scalar"): + self.panel.pow(self.panel.minor_xs(idx), axis='minor') + with assertRaisesRegexp(ValueError, "Simple arithmetic.*scalar"): + self.panel.mod(self.panel.minor_xs(idx), axis='minor') - check_op(operator.add, 'add') - check_op(operator.sub, 'subtract') - check_op(operator.mul, 'multiply') + for op in ops: + try: + check_op(getattr(operator, op), op) + except: + print("Failing operation: %r" % op) + raise if compat.PY3: - check_op(operator.truediv, 'divide') - else: - check_op(operator.div, 'divide') + try: + check_op(operator.truediv, 'div') + except: + print("Failing operation: %r" % name) + raise def test_combinePanel(self): result = self.panel.add(self.panel) @@ -1737,6 +1754,31 @@ def test_operators(self): result = (self.panel + 1).to_panel() assert_frame_equal(wp['ItemA'] + 1, result['ItemA']) + def test_arith_flex_panel(self): + ops = ['add', 'sub', 'mul', 'div', 'truediv', 'pow', 'floordiv', 'mod'] + if not compat.PY3: + aliases = {} + else: + aliases = {'div': 'truediv'} + self.panel = self.panel.to_panel() + n = np.random.randint(-50, 50) + for op in ops: + try: + alias = aliases.get(op, op) + f = getattr(operator, alias) + result = getattr(self.panel, op)(n) + exp = f(self.panel, n) + assert_panel_equal(result, exp, check_panel_type=True) + + # rops + r_f = lambda x, y: f(y, x) + result = getattr(self.panel, 'r' + op)(n) + exp = r_f(self.panel, n) + assert_panel_equal(result, exp) + except: + print("Failing operation %r" % op) + raise + def test_sort(self): def is_sorted(arr): return (arr[1:] > arr[:-1]).any() diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index f8320149f4ac6..479d627e72346 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -269,7 +269,6 @@ class SafeForSparse(object): _ts = tm.makeTimeSeries() - class TestSeries(unittest.TestCase, CheckNameIntegration): _multiprocess_can_split_ = True @@ -1946,21 +1945,27 @@ def test_all_any(self): self.assert_(bool_series.any()) def test_op_method(self): - def _check_op(series, other, op, alt): - result = op(series, other) - expected = alt(series, other) - tm.assert_almost_equal(result, expected) - - def check(series, other): - simple_ops = ['add', 'sub', 'mul'] + def check(series, other, check_reverse=False): + simple_ops = ['add', 'sub', 'mul', 'floordiv', 'truediv', 'pow'] + if not compat.PY3: + simple_ops.append('div') for opname in simple_ops: - _check_op(series, other, getattr(Series, opname), - getattr(operator, opname)) + op = getattr(Series, opname) + alt = getattr(operator, opname) + result = op(series, other) + expected = alt(series, other) + tm.assert_almost_equal(result, expected) + if check_reverse: + rop = getattr(Series, "r" + opname) + result = rop(series, other) + expected = alt(other, series) + tm.assert_almost_equal(result, expected) check(self.ts, self.ts * 2) check(self.ts, self.ts[::2]) - check(self.ts, 5) + check(self.ts, 5, check_reverse=True) + check(tm.makeFloatSeries(), tm.makeFloatSeries(), check_reverse=True) def test_neg(self): assert_series_equal(-self.series, -1 * self.series) @@ -2186,13 +2191,18 @@ def test_timedeltas_with_DateOffset(self): s = Series([Timestamp('20130101 9:01'), Timestamp('20130101 9:02')]) result = s + pd.offsets.Second(5) + result2 = pd.offsets.Second(5) + s expected = Series( [Timestamp('20130101 9:01:05'), Timestamp('20130101 9:02:05')]) + assert_series_equal(result, expected) + assert_series_equal(result2, expected) result = s + pd.offsets.Milli(5) + result2 = pd.offsets.Milli(5) + s expected = Series( [Timestamp('20130101 9:01:00.005'), Timestamp('20130101 9:02:00.005')]) assert_series_equal(result, expected) + assert_series_equal(result2, expected) result = s + pd.offsets.Minute(5) + pd.offsets.Milli(5) expected = Series( @@ -2203,20 +2213,25 @@ def test_timedeltas_with_DateOffset(self): # operate with np.timedelta64 correctly result = s + np.timedelta64(1, 's') + result2 = np.timedelta64(1, 's') + s expected = Series( [Timestamp('20130101 9:01:01'), Timestamp('20130101 9:02:01')]) assert_series_equal(result, expected) + assert_series_equal(result2, expected) result = s + np.timedelta64(5, 'ms') + result2 = np.timedelta64(5, 'ms') + s expected = Series( [Timestamp('20130101 9:01:00.005'), Timestamp('20130101 9:02:00.005')]) assert_series_equal(result, expected) + assert_series_equal(result2, expected) # valid DateOffsets for do in [ 'Hour', 'Minute', 'Second', 'Day', 'Micro', 'Milli', 'Nano' ]: op = getattr(pd.offsets,do) s + op(5) + op(5) + s # invalid DateOffsets for do in [ 'Week', 'BDay', 'BQuarterEnd', 'BMonthEnd', 'BYearEnd', @@ -2225,6 +2240,7 @@ def test_timedeltas_with_DateOffset(self): 'MonthBegin', 'QuarterBegin' ]: op = getattr(pd.offsets,do) self.assertRaises(TypeError, s.__add__, op(5)) + self.assertRaises(TypeError, s.__radd__, op(5)) def test_timedelta64_operations_with_timedeltas(self): @@ -2237,6 +2253,11 @@ def test_timedelta64_operations_with_timedeltas(self): self.assert_(result.dtype == 'm8[ns]') assert_series_equal(result, expected) + result2 = td2 - td1 + expected = (Series([timedelta(seconds=1)] * 3) - + Series([timedelta(seconds=0)] * 3)) + assert_series_equal(result2, expected) + # roundtrip assert_series_equal(result + td2,td1) @@ -2318,6 +2339,10 @@ def test_timedelta64_conversions(self): result = s1 / np.timedelta64(m,unit) assert_series_equal(result, expected) + # reverse op + expected = s1.apply(lambda x: np.timedelta64(m,unit) / x) + result = np.timedelta64(m,unit) / s1 + def test_timedelta64_equal_timedelta_supported_ops(self): ser = Series([Timestamp('20130301'), Timestamp('20130228 23:00:00'), Timestamp('20130228 22:00:00'), @@ -2351,44 +2376,58 @@ def timedelta64(*args): def test_operators_datetimelike(self): - # timedelta64 ### - td1 = Series([timedelta(minutes=5, seconds=3)] * 3) - td2 = timedelta(minutes=5, seconds=4) - for op in ['__mul__', '__floordiv__', '__pow__']: - op = getattr(td1, op, None) - if op is not None: - self.assertRaises(TypeError, op, td2) + def run_ops(ops, get_ser, test_ser): + for op in ops: + try: + op = getattr(get_ser, op, None) + if op is not None: + self.assertRaises(TypeError, op, test_ser) + except: + print("Failed on op %r" % op) + raise + ### timedelta64 ### + td1 = Series([timedelta(minutes=5,seconds=3)]*3) + td2 = timedelta(minutes=5,seconds=4) + ops = ['__mul__','__floordiv__','__pow__', + '__rmul__','__rfloordiv__','__rpow__'] + run_ops(ops, td1, td2) td1 + td2 + td2 + td1 td1 - td2 + td2 - td1 td1 / td2 - - # datetime64 ### - dt1 = Series( - [Timestamp('20111230'), Timestamp('20120101'), Timestamp('20120103')]) - dt2 = Series( - [Timestamp('20111231'), Timestamp('20120102'), Timestamp('20120104')]) - for op in ['__add__', '__mul__', '__floordiv__', '__truediv__', '__div__', '__pow__']: - sop = getattr(dt1, op, None) - if sop is not None: - self.assertRaises(TypeError, sop, dt2) + td2 / td1 + + ### datetime64 ### + dt1 = Series([Timestamp('20111230'), Timestamp('20120101'), + Timestamp('20120103')]) + dt2 = Series([Timestamp('20111231'), Timestamp('20120102'), + Timestamp('20120104')]) + ops = ['__add__', '__mul__', '__floordiv__', '__truediv__', '__div__', + '__pow__', '__radd__', '__rmul__', '__rfloordiv__', + '__rtruediv__', '__rdiv__', '__rpow__'] + run_ops(ops, dt1, dt2) dt1 - dt2 + dt2 - dt1 - # datetime64 with timetimedelta ### - for op in ['__mul__', '__floordiv__', '__truediv__', '__div__', '__pow__']: - sop = getattr(dt1, op, None) - if sop is not None: - self.assertRaises(TypeError, sop, td1) + ### datetime64 with timetimedelta ### + ops = ['__mul__', '__floordiv__', '__truediv__', '__div__', '__pow__', + '__rmul__', '__rfloordiv__', '__rtruediv__', '__rdiv__', + '__rpow__'] + run_ops(ops, dt1, td1) dt1 + td1 + td1 + dt1 dt1 - td1 - - # timetimedelta with datetime64 ### - for op in ['__sub__', '__mul__', '__floordiv__', '__truediv__', '__div__', '__pow__']: - sop = getattr(td1, op, None) - if sop is not None: - self.assertRaises(TypeError, sop, dt1) - - # timedelta + datetime ok + # TODO: Decide if this ought to work. + # td1 - dt1 + + ### timetimedelta with datetime64 ### + ops = ['__sub__', '__mul__', '__floordiv__', '__truediv__', '__div__', + '__pow__', '__rsub__', '__rmul__', '__rfloordiv__', + '__rtruediv__', '__rdiv__', '__rpow__'] + run_ops(ops, td1, dt1) td1 + dt1 + dt1 + td1 def test_timedelta64_functions(self): @@ -2517,6 +2556,9 @@ def test_sub_of_datetime_from_TimeSeries(self): result = _possibly_cast_to_timedelta(np.abs(a - b)) self.assert_(result.dtype == 'timedelta64[ns]') + result = _possibly_cast_to_timedelta(np.abs(b - a)) + self.assert_(result.dtype == 'timedelta64[ns]') + def test_datetime64_with_index(self): # arithmetic integer ops with an index @@ -2537,8 +2579,8 @@ def test_datetime64_with_index(self): df = DataFrame(np.random.randn(5,2),index=date_range('20130101',periods=5)) df['date'] = Timestamp('20130102') - df['expected'] = df['date']-df.index.to_series() - df['result'] = df['date']-df.index + df['expected'] = df['date'] - df.index.to_series() + df['result'] = df['date'] - df.index assert_series_equal(df['result'],df['expected']) def test_timedelta64_nan(self): @@ -2586,7 +2628,9 @@ def test_operators_na_handling(self): index=[date(2012, 1, 1), date(2012, 1, 2)]) result = s + s.shift(1) + result2 = s.shift(1) + s self.assert_(isnull(result[0])) + self.assert_(isnull(result2[0])) s = Series(['foo', 'bar', 'baz', np.nan]) result = 'prefix_' + s @@ -2616,7 +2660,7 @@ def test_comparison_operators_with_nas(self): s = Series(bdate_range('1/1/2000', periods=10), dtype=object) s[::2] = np.nan - # test that comparions work + # test that comparisons work ops = ['lt', 'le', 'gt', 'ge', 'eq', 'ne'] for op in ops: val = s[5] @@ -2753,7 +2797,10 @@ def tester(a, b): assert_series_equal(tester(s, list(s)), s) d = DataFrame({'A': s}) - self.assertRaises(TypeError, tester, s, d) + # TODO: Fix this exception - needs to be fixed! (see GH5035) + # (previously this was a TypeError because series returned + # NotImplemented + self.assertRaises(ValueError, tester, s, d) def test_idxmin(self): # test idxmin @@ -2942,19 +2989,13 @@ def test_series_frame_radd_bug(self): self.assertRaises(TypeError, operator.add, datetime.now(), self.ts) def test_operators_frame(self): - import sys - buf = StringIO() - tmp = sys.stderr - sys.stderr = buf # rpow does not work with DataFrame - try: - df = DataFrame({'A': self.ts}) + df = DataFrame({'A': self.ts}) - tm.assert_almost_equal(self.ts + self.ts, (self.ts + df)['A']) - tm.assert_almost_equal(self.ts ** self.ts, (self.ts ** df)['A']) - tm.assert_almost_equal(self.ts < self.ts, (self.ts < df)['A']) - finally: - sys.stderr = tmp + tm.assert_almost_equal(self.ts + self.ts, (self.ts + df)['A']) + tm.assert_almost_equal(self.ts ** self.ts, (self.ts ** df)['A']) + tm.assert_almost_equal(self.ts < self.ts, (self.ts < df)['A']) + tm.assert_almost_equal(self.ts / self.ts, (self.ts / df)['A']) def test_operators_combine(self): def _check_fill(meth, op, a, b, fill_value=0): @@ -2987,8 +3028,10 @@ def _check_fill(meth, op, a, b, fill_value=0): a = Series([nan, 1., 2., 3., nan], index=np.arange(5)) b = Series([nan, 1, nan, 3, nan, 4.], index=np.arange(6)) - ops = [Series.add, Series.sub, Series.mul, Series.div] - equivs = [operator.add, operator.sub, operator.mul] + ops = [Series.add, Series.sub, Series.mul, Series.pow, + Series.truediv, Series.div] + equivs = [operator.add, operator.sub, operator.mul, operator.pow, + operator.truediv] if compat.PY3: equivs.append(operator.truediv) else: @@ -3253,9 +3296,12 @@ def test_value_counts_nunique(self): # timedelta64[ns] from datetime import timedelta td = df.dt - df.dt + timedelta(1) + td2 = timedelta(1) + (df.dt - df.dt) result = td.value_counts() + result2 = td2.value_counts() #self.assert_(result.index.dtype == 'timedelta64[ns]') self.assert_(result.index.dtype == 'int64') + self.assert_(result2.index.dtype == 'int64') # basics.rst doc example series = Series(np.random.randn(500)) diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 92ed1e415d11a..232ebd2c3726c 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -19,6 +19,10 @@ #---------------------------------------------------------------------- # DateOffset +class ApplyTypeError(TypeError): + # sentinel class for catching the apply error to return NotImplemented + pass + class CacheableOffset(object): @@ -128,7 +132,7 @@ def __repr__(self): kwds_new[key] = self.kwds[key] if len(kwds_new) > 0: attrs.append('='.join((attr, repr(kwds_new)))) - else: + else: if attr not in exclude: attrs.append('='.join((attr, repr(getattr(self, attr))))) @@ -136,7 +140,7 @@ def __repr__(self): plural = 's' else: plural = '' - + n_str = "" if self.n != 1: n_str = "%s * " % self.n @@ -170,19 +174,21 @@ def __call__(self, other): return self.apply(other) def __add__(self, other): - return self.apply(other) + try: + return self.apply(other) + except ApplyTypeError: + return NotImplemented def __radd__(self, other): return self.__add__(other) def __sub__(self, other): if isinstance(other, datetime): - raise TypeError('Cannot subtract datetime from offset!') + raise TypeError('Cannot subtract datetime from offset.') elif type(other) == type(self): return self.__class__(self.n - other.n, **self.kwds) else: # pragma: no cover - raise TypeError('Cannot subtract %s from %s' - % (type(other), type(self))) + return NotImplemented def __rsub__(self, other): return self.__class__(-self.n, **self.kwds) + other @@ -273,7 +279,7 @@ def __repr__(self): #TODO: Figure out if this should be merged into DateOffset plural = 's' else: plural = '' - + n_str = "" if self.n != 1: n_str = "%s * " % self.n @@ -370,8 +376,8 @@ def apply(self, other): return BDay(self.n, offset=self.offset + other, normalize=self.normalize) else: - raise TypeError('Only know how to combine business day with ' - 'datetime or timedelta!') + raise ApplyTypeError('Only know how to combine business day with ' + 'datetime or timedelta.') @classmethod def onOffset(cls, dt): @@ -463,8 +469,8 @@ def apply(self, other): return BDay(self.n, offset=self.offset + other, normalize=self.normalize) else: - raise TypeError('Only know how to combine trading day with ' - 'datetime, datetime64 or timedelta!') + raise ApplyTypeError('Only know how to combine trading day with ' + 'datetime, datetime64 or timedelta.') dt64 = self._to_dt64(other) day64 = dt64.astype('datetime64[D]') @@ -1177,7 +1183,10 @@ def __add__(self, other): return type(self)(self.n + other.n) else: return _delta_to_tick(self.delta + other.delta) - return self.apply(other) + try: + return self.apply(other) + except ApplyTypeError: + return NotImplemented def __eq__(self, other): if isinstance(other, compat.string_types): @@ -1220,8 +1229,8 @@ def apply(self, other): return other + self.delta elif isinstance(other, type(self)): return type(self)(self.n + other.n) - else: # pragma: no cover - raise TypeError('Unhandled type: %s' % type(other)) + else: + raise ApplyTypeError('Unhandled type: %s' % type(other).__name__) _rule_base = 'undefined'