From 1bc7f15f17bf446caf7b8bfe3f8299c05922ad79 Mon Sep 17 00:00:00 2001 From: Jeffrey Tratner Date: Wed, 26 Jun 2013 21:49:53 -0400 Subject: [PATCH 01/10] CLN: Allow evaluate to take eval_kwargs + testing Now evaluate takes eval_kwargs (generally just truediv) to numexpr.evaluate plus adds a set of testing methods to check that numexpr was actually used successfully. Also changes the if hasattr idiom --> getattr. --- pandas/core/expressions.py | 70 ++++++++++++++++++++++++-------------- 1 file changed, 45 insertions(+), 25 deletions(-) diff --git a/pandas/core/expressions.py b/pandas/core/expressions.py index abe891b82410c..f553b334ec612 100644 --- a/pandas/core/expressions.py +++ b/pandas/core/expressions.py @@ -13,6 +13,8 @@ except ImportError: # pragma: no cover _NUMEXPR_INSTALLED = False +_TEST_MODE = None +_TEST_RESULT = None _USE_NUMEXPR = _NUMEXPR_INSTALLED _evaluate = None _where = None @@ -53,12 +55,14 @@ def set_numexpr_threads(n = None): def _evaluate_standard(op, op_str, a, b, raise_on_error=True, **eval_kwargs): """ standard evaluation """ + if _TEST_MODE: + _store_test_result(False) return op(a,b) def _can_use_numexpr(op, op_str, a, b, dtype_check): """ return a boolean if we WILL be using numexpr """ if op_str is not None: - + # required min elements (otherwise we are adding overhead) if np.prod(a.shape) > _MIN_ELEMENTS: @@ -81,31 +85,30 @@ def _can_use_numexpr(op, op_str, a, b, dtype_check): def _evaluate_numexpr(op, op_str, a, b, raise_on_error = False, **eval_kwargs): result = None - if _can_use_numexpr(op, op_str, a, b, 'evaluate'): try: - a_value, b_value = a, b - if hasattr(a_value,'values'): - a_value = a_value.values - if hasattr(b_value,'values'): - b_value = b_value.values - result = ne.evaluate('a_value %s b_value' % op_str, - local_dict={ 'a_value' : a_value, - 'b_value' : b_value }, + a_value = getattr(a, "values", a) + b_value = getattr(b, "values", b) + result = ne.evaluate('a_value %s b_value' % op_str, + local_dict={ 'a_value' : a_value, + 'b_value' : b_value }, casting='safe', **eval_kwargs) - except (ValueError), detail: + except ValueError as detail: if 'unknown type object' in str(detail): pass - except (Exception), detail: + except Exception as detail: if raise_on_error: raise TypeError(str(detail)) + if _TEST_MODE: + _store_test_result(result is not None) + if result is None: result = _evaluate_standard(op,op_str,a,b,raise_on_error) return result -def _where_standard(cond, a, b, raise_on_error=True): +def _where_standard(cond, a, b, raise_on_error=True): return np.where(cond, a, b) def _where_numexpr(cond, a, b, raise_on_error = False): @@ -114,22 +117,18 @@ def _where_numexpr(cond, a, b, raise_on_error = False): if _can_use_numexpr(None, 'where', a, b, 'where'): try: - cond_value, a_value, b_value = cond, a, b - if hasattr(cond_value,'values'): - cond_value = cond_value.values - if hasattr(a_value,'values'): - a_value = a_value.values - if hasattr(b_value,'values'): - b_value = b_value.values + cond_value = getattr(cond, 'values', cond) + a_value = getattr(a, 'values', a) + b_value = getattr(b, 'values', b) result = ne.evaluate('where(cond_value,a_value,b_value)', local_dict={ 'cond_value' : cond_value, - 'a_value' : a_value, - 'b_value' : b_value }, + 'a_value' : a_value, + 'b_value' : b_value }, casting='safe') - except (ValueError), detail: + except ValueError as detail: if 'unknown type object' in str(detail): pass - except (Exception), detail: + except Exception as detail: if raise_on_error: raise TypeError(str(detail)) @@ -156,7 +155,6 @@ def evaluate(op, op_str, a, b, raise_on_error=False, use_numexpr=True, **eval_kw otherwise evaluate the op with and return the results use_numexpr : whether to try to use numexpr (default True) """ - if use_numexpr: return _evaluate(op, op_str, a, b, raise_on_error=raise_on_error, **eval_kwargs) return _evaluate_standard(op, op_str, a, b, raise_on_error=raise_on_error) @@ -178,3 +176,25 @@ def where(cond, a, b, raise_on_error=False, use_numexpr=True): if use_numexpr: return _where(cond, a, b, raise_on_error=raise_on_error) return _where_standard(cond, a, b, raise_on_error=raise_on_error) + +def set_test_mode(v = True): + """ + Keeps track of whether numexpr was used. Stores an additional ``True`` for + every successful use of evaluate with numexpr since the last + ``get_test_result`` + """ + global _TEST_MODE, _TEST_RESULT + _TEST_MODE = v + _TEST_RESULT = [] + +def _store_test_result(used_numexpr): + global _TEST_RESULT + if used_numexpr: + _TEST_RESULT.append(used_numexpr) + +def get_test_result(): + """get test result and reset test_results""" + global _TEST_RESULT + res = _TEST_RESULT + _TEST_RESULT = [] + return res From dad26b730ac14bc14bc1db381603df0c53233e3f Mon Sep 17 00:00:00 2001 From: Jeffrey Tratner Date: Wed, 26 Jun 2013 21:58:34 -0400 Subject: [PATCH 02/10] TST: Make test utilities more expressive. Adds a better error for ``isinstance`` checks as well as a better error message for the ``isnull`` case. --- pandas/util/testing.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 19d7c707a0689..b616c86175e39 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -125,6 +125,9 @@ def isiterable(obj): return hasattr(obj, '__iter__') +def assert_isinstance(obj, kind): + assert isinstance(obj, kind), "Expected type %r, saw %r" % (kind, type(obj)) + def assert_almost_equal(a, b, check_less_precise = False): if isinstance(a, dict) or isinstance(b, dict): return assert_dict_equal(a, b) @@ -148,7 +151,7 @@ def assert_almost_equal(a, b, check_less_precise = False): err_msg = lambda a, b: 'expected %.5f but got %.5f' % (b, a) if isnull(a): - np.testing.assert_(isnull(b)) + np.testing.assert_(isnull(b), "Expected null (%r) found %r instead" % (a, b)) return if isinstance(a, (bool, float, int, np.float32)): @@ -222,8 +225,8 @@ def assert_frame_equal(left, right, check_dtype=True, check_names=True): if check_frame_type: assert(type(left) == type(right)) - assert(isinstance(left, DataFrame)) - assert(isinstance(right, DataFrame)) + assert_isinstance(left, DataFrame) + assert_isinstance(right, DataFrame) if check_less_precise: assert_almost_equal(left.columns,right.columns) From d4167d815b824ce59c738a4ceaab52ef20b53e00 Mon Sep 17 00:00:00 2001 From: Jeffrey Tratner Date: Wed, 26 Jun 2013 22:01:57 -0400 Subject: [PATCH 03/10] TST: Test all arithmetic ops + frame default_axis Makes the entire arithmetic test suite explicit as well as sets up test cases to make sure the default_axis responses do not change. --- pandas/tests/test_frame.py | 95 +++++++++++++++++++++++++++---------- pandas/tests/test_panel.py | 2 + pandas/tests/test_series.py | 11 ++--- 3 files changed, 76 insertions(+), 32 deletions(-) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 60f10c6a919da..92d87b059885b 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -4166,35 +4166,47 @@ def test_first_last_valid(self): self.assert_(index == frame.index[-6]) def test_arith_flex_frame(self): - ops = ['add', 'sub', 'mul', 'div', 'pow'] - aliases = {'div': 'truediv'} + ops = ['add', 'sub', 'mul','div', 'truediv', 'pow', 'floordiv', 'mod'] + if not py3compat.PY3: + aliases = {} + else: + aliases = {'div': 'truediv'} for op in ops: - alias = aliases.get(op, op) - f = getattr(operator, alias) - result = getattr(self.frame, op)(2 * self.frame) - exp = f(self.frame, 2 * self.frame) - assert_frame_equal(result, exp) - - # vs mix float - result = getattr(self.mixed_float, op)(2 * self.mixed_float) - exp = f(self.mixed_float, 2 * self.mixed_float) - assert_frame_equal(result, exp) - _check_mixed_float(result, dtype = dict(C = None)) - - # vs mix int - if op in ['add','sub','mul']: - result = getattr(self.mixed_int, op)(2 + self.mixed_int) - exp = f(self.mixed_int, 2 + self.mixed_int) - - # overflow in the uint - dtype = None - if op in ['sub']: - dtype = dict(B = 'object', C = None) - elif op in ['add','mul']: - dtype = dict(C = None) + try: + alias = aliases.get(op, op) + f = getattr(operator, alias) + result = getattr(self.frame, op)(2 * self.frame) + exp = f(self.frame, 2 * self.frame) + assert_frame_equal(result, exp) + + # vs mix float + result = getattr(self.mixed_float, op)(2 * self.mixed_float) + exp = f(self.mixed_float, 2 * self.mixed_float) assert_frame_equal(result, exp) - _check_mixed_int(result, dtype = dtype) + _check_mixed_float(result, dtype = dict(C = None)) + + result = getattr(self.intframe, op)(2 * self.intframe) + exp = f(self.intframe, 2 * self.intframe) + print repr(op), repr(f) + assert_frame_equal(result, exp) + + # vs mix int + if op in ['add','sub','mul']: + result = getattr(self.mixed_int, op)(2 + self.mixed_int) + exp = f(self.mixed_int, 2 + self.mixed_int) + + # overflow in the uint + dtype = None + if op in ['sub']: + dtype = dict(B = 'object', C = None) + elif op in ['add','mul']: + dtype = dict(C = None) + assert_frame_equal(result, exp) + _check_mixed_int(result, dtype = dtype) + except: + print("Failing operation %r" % op) + raise # res_add = self.frame.add(self.frame) # res_sub = self.frame.sub(self.frame) @@ -10590,6 +10602,37 @@ def _check_f(base, f): f = lambda x: x.rename({1: 'foo'}, inplace=True) _check_f(data.copy()['c'], f) + def test_default_axis(self): + # in frame, default axis is `None` for special methods + # and `columns` for flex methods + frame = DataFrame(np.random.randn(5, 7), columns=list("ABCDEFG")) + ops = ['add', 'radd', 'sub', 'rsub', 'div', 'rdiv', 'truediv', 'rtruediv', 'floordiv', 'rfloordiv', 'mul', 'rmul', 'pow', 'rpow', 'mod', 'rmod'] + special_ops = list("__%s__" % op for op in ops) + for flex_name, special_name in zip(ops, special_ops): + try: + try: + flex_meth = getattr(frame, flex_name) + special_meth = getattr(frame, special_name) + except AttributeError: + # for now, ignore if doesn't respond to methods + continue + expected = flex_meth(frame.irow(0), axis=1) + result = flex_meth(frame.irow(0)) + #flex method has right axis + assert_frame_equal(result, expected) + #special method has right axis + result2 = special_meth(frame.irow(0)) + assert_frame_equal(result2, expected) + # finally, test that going against default axis results in Na + flex_result = flex_meth(frame.icol(0)) + assert isnull(flex_result).all().all(), "Flex method should have been all NaN when using column" + flex_result2 = flex_meth(frame.irow(0), axis=0) + assert isnull(flex_result2).all().all(), "With axis 0, flex method should be all NaN when using row" + special_result = special_meth(frame.icol(0)) + assert isnull(special_result).all().all(), "Special method should be all NaN when using column" + except: + print("Failure on %r, %r" % (flex_name, special_name)) + raise if __name__ == '__main__': # unittest.main() diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 4e57977a787f2..f615604dc353c 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -312,6 +312,8 @@ def check_op(op, name): check_op(operator.truediv, 'divide') else: check_op(operator.div, 'divide') + check_op(operator.truediv, 'truediv') + check_op(operator.floordiv, 'floordiv') def test_combinePanel(self): result = self.panel.add(self.panel) diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index 09f3cc7b61f33..5bf37c919f5cc 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -2376,12 +2376,11 @@ def _check_fill(meth, op, a, b, fill_value=0): a = Series([nan, 1., 2., 3., nan], index=np.arange(5)) b = Series([nan, 1, nan, 3, nan, 4.], index=np.arange(6)) - ops = [Series.add, Series.sub, Series.mul, Series.div] - equivs = [operator.add, operator.sub, operator.mul] - if py3compat.PY3: - equivs.append(operator.truediv) - else: - equivs.append(operator.div) + base_ops = ['add', 'sub', 'mul', 'truediv', 'floordiv', 'mod', 'pow'] + if not py3compat.PY3: + base_ops.append('div') + ops = [getattr(Series, op) for op in base_ops] + equivs = [getattr(operator, op) for op in base_ops] fillvals = [0, 0, 1, 1] for op, equiv_op, fv in zip(ops, equivs, fillvals): From b15c22566a0879911e0a17a7f9b5e23518dd232e Mon Sep 17 00:00:00 2001 From: Jeffrey Tratner Date: Wed, 26 Jun 2013 22:14:52 -0400 Subject: [PATCH 04/10] ENH: Add bind_method to core/common --- pandas/core/common.py | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/pandas/core/common.py b/pandas/core/common.py index 96c567cbb6348..62c654d0b397d 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -4,7 +4,7 @@ import itertools import re -from datetime import datetime +import types from numpy.lib.format import read_array, write_array import numpy as np @@ -38,6 +38,30 @@ class AmbiguousIndexError(PandasError, KeyError): pass +def bind_method(cls, name, func): + """Bind a method to class, python 2 and python 3 compatible. + + Parameters + ---------- + + cls : type + class to receive bound method + name : basestring + name of method on class instance + func : function + function to be bound as method + + + Returns + ------- + None + """ + # only python 2 has bound/unbound method issue + if not py3compat.PY3: + setattr(cls, name, types.MethodType(func, None, cls)) + else: + setattr(cls, name, func) + _POSSIBLY_CAST_DTYPES = set([ np.dtype(t) for t in ['M8[ns]','m8[ns]','O','int8','uint8','int16','uint16','int32','uint32','int64','uint64'] ]) _NS_DTYPE = np.dtype('M8[ns]') _TD_DTYPE = np.dtype('m8[ns]') From 6e9eef50ced1b129cae17406e570d5ac07718c74 Mon Sep 17 00:00:00 2001 From: Jeffrey Tratner Date: Wed, 26 Jun 2013 22:15:55 -0400 Subject: [PATCH 05/10] ENH: Add classmethods to generate arithmetic operators in core/generic --- pandas/core/generic.py | 142 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 142 insertions(+) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 627a8ab825e5f..1fa4978df1858 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1,6 +1,8 @@ # pylint: disable=W0231,E1101 +import operator import numpy as np +from pandas.core.common import bind_method from pandas.core.index import MultiIndex import pandas.core.indexing as indexing @@ -8,6 +10,7 @@ from pandas.tseries.index import DatetimeIndex import pandas.core.common as com import pandas.lib as lib +from pandas.util import py3compat class PandasError(Exception): @@ -52,6 +55,70 @@ def __hash__(self): raise TypeError('{0!r} objects are mutable, thus they cannot be' ' hashed'.format(self.__class__.__name__)) + #---------------------------------------------------------------------- + # Arithmetic! + @classmethod + def _add_special_arithmetic_methods(cls, arith_method=None, radd_func=None, comp_method=None, bool_method=None, + use_numexpr=True): + """ + Adds the full suite of special arithmetic methods (``__add__``, ``__sub__``, etc.) to the class. + + Parameters + ---------- + flex_arith_method : factory for flex arithmetic methods, with op string: + f(op, name, str_rep, default_axis=None, fill_zeros=None, **eval_kwargs) + radd_func : Possible replacement for ``lambda x, y: y + x`` for compatibility + flex_comp_method : optional, factory for rich comparison - signature: f(op, name, str_rep) + use_numexpr : whether to accelerate with numexpr, defaults to True + """ + radd_func = radd_func or operator.add + # in frame, special methods have default_axis = None, comp methods use 'columns' + new_methods = create_methods(arith_method, radd_func, comp_method, bool_method, use_numexpr, default_axis=None, + special=True) + + # inplace operators (I feel like these should get passed an `inplace=True` + # or just be removed + new_methods.update(dict( + __iadd__=new_methods["__add__"], + __isub__=new_methods["__sub__"], + __imul__=new_methods["__mul__"], + __itruediv__=new_methods["__truediv__"], + __ipow__=new_methods["__pow__"] + )) + if not py3compat.PY3: + new_methods["__idiv__"] = new_methods["__div__"] + for name, method in new_methods.items(): + if name not in cls.__dict__: + bind_method(cls, name, method) + + @classmethod + def _add_flex_arithmetic_methods(cls, flex_arith_method, radd_func=None, flex_comp_method=None, + flex_bool_method=None, use_numexpr=True): + """ + Adds the full suite of flex arithmetic methods (``pow``, ``mul``, ``add``) to the class. + + Parameters + ---------- + flex_arith_method : factory for flex arithmetic methods, with op string: + f(op, name, str_rep, default_axis=None, fill_zeros=None, **eval_kwargs) + radd_func : Possible replacement for ``lambda x, y: y + x`` for compatibility + flex_comp_method : optional, factory for rich comparison - signature: f(op, name, str_rep) + use_numexpr : whether to accelerate with numexpr, defaults to True + """ + radd_func = radd_func or operator.add + # in frame, default axis is 'columns', doesn't matter for series and panel + new_methods = create_methods( + flex_arith_method, radd_func, flex_comp_method, flex_bool_method, + use_numexpr, default_axis='columns', special=False) + new_methods.update(dict( + multiply=new_methods['mul'], + subtract=new_methods['sub'], + divide=new_methods['div'] + )) + + for name, method in new_methods.items(): + if name not in cls.__dict__: + bind_method(cls, name, method) #---------------------------------------------------------------------- # Axis name business @@ -1181,3 +1248,78 @@ def truncate(self, before=None, after=None, copy=True): result = result.copy() return result + + + + +def create_methods(arith_method, radd_func, comp_method, bool_method, use_numexpr, special=False, default_axis='columns'): + # NOTE: Only frame cares about default_axis, specifically: special methods have default axis None, + # whereas flex methods have default axis 'columns' + # if we're not using numexpr, then don't pass a str_rep + if use_numexpr: + op = lambda x: x + else: + op = lambda x: None + if special: + def names(x): + if x[-1] == "_": + return "__%s_" % x + else: + return "__%s__" % x + else: + names = lambda x: x + radd_func = radd_func or operator.add + # Inframe, all special methods have default_axis=None, flex methods have default_axis set to the default (columns) + new_methods = dict( + add=arith_method(operator.add, names('add'), op('+'), default_axis=default_axis), + radd=arith_method(radd_func, names('radd'), op('+'), default_axis=default_axis), + sub=arith_method(operator.sub, names('sub'), op('-'), default_axis=default_axis), + mul=arith_method(operator.mul, names('mul'), op('*'), default_axis=default_axis), + truediv=arith_method(operator.truediv, names('truediv'), op('/'), + truediv=True, fill_zeros=np.inf, default_axis=default_axis), + floordiv=arith_method(operator.floordiv, names('floordiv'), op('//'), + default_axis=default_axis, fill_zeros=np.inf), + # Causes a floating point exception in the tests when numexpr + # enabled, so for now no speedup + mod=arith_method(operator.mod, names('mod'), default_axis=default_axis, + fill_zeros=np.nan), + pow=arith_method(operator.pow, names('pow'), op('**'), default_axis=default_axis), + # not entirely sure why this is necessary, but previously was included + # so it's here to maintain compatibility + rmul=arith_method(operator.mul, names('rmul'), default_axis=default_axis), + rsub=arith_method(lambda x, y: y - x, names('rsub'), default_axis=default_axis), + rtruediv=arith_method(lambda x, y: operator.truediv(y, x), names('rtruediv'), op('/'), + truediv=True, fill_zeros=np.inf, default_axis=default_axis), + rfloordiv=arith_method(lambda x, y: operator.floordiv(y, x), names('rfloordiv'), op('//'), + default_axis=default_axis, fill_zeros=np.inf), + rpow=arith_method(lambda x, y: y ** x, names('rpow'), default_axis=default_axis), + rmod=arith_method(lambda x, y: y % x, names('rmod'), default_axis=default_axis), + ) + if not py3compat.PY3: + new_methods["div"] = arith_method(operator.div, names('div'), op('/'), + truediv=False, fill_zeros=np.inf, default_axis=default_axis) + new_methods["rdiv"] = arith_method(lambda x, y: operator.div(y, x), names('rdiv'), op('/'), + truediv=False, fill_zeros=np.inf, default_axis=default_axis) + else: + new_methods["div"] = arith_method(operator.truediv, names('div'), op('/'), + truediv=True, fill_zeros=np.inf, default_axis=default_axis) + # Comp methods never had a default axis set + if comp_method: + new_methods.update(dict( + eq=comp_method(operator.eq, names('eq'), op('==')), + ne=comp_method(operator.ne, names('ne'), op('!=')), + lt=comp_method(operator.lt, names('lt'), op('<')), + gt=comp_method(operator.gt, names('gt'), op('>')), + le=comp_method(operator.le, names('le'), op('<=')), + ge=comp_method(operator.ge, names('ge'), op('>=')), + )) + if bool_method: + new_methods.update(dict( + and_=bool_method(operator.and_, names('and_ [&]'), op('&')), + or_=bool_method(operator.or_, names('or_ [|]'), op('|')), + # For some reason ``^`` wasn't used in original. + xor=bool_method(operator.xor, names('xor [^]')) + )) + + new_methods = dict((names(k), v) for k, v in new_methods.items()) + return new_methods From 738089ea83559103658ea4461bceca2e11d8111e Mon Sep 17 00:00:00 2001 From: Jeffrey Tratner Date: Wed, 26 Jun 2013 22:18:26 -0400 Subject: [PATCH 06/10] ENH: Use numexpr + normalize signature throughout. BUG: Fix ``_fill_zeros`` call to work even if TypeError (previously was inconsistent). --- pandas/core/frame.py | 6 +++--- pandas/core/panel.py | 18 ++++++++++++++---- pandas/core/series.py | 44 +++++++++++++++++++++++++++++++------------ 3 files changed, 49 insertions(+), 19 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 5f1ea00e421a8..3731057aa3049 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -194,8 +194,6 @@ def _arith_method(op, name, str_rep = None, default_axis='columns', fill_zeros=N def na_op(x, y): try: result = expressions.evaluate(op, str_rep, x, y, raise_on_error=True, **eval_kwargs) - result = com._fill_zeros(result,y,fill_zeros) - except TypeError: xrav = x.ravel() result = np.empty(x.size, dtype=x.dtype) @@ -210,6 +208,8 @@ def na_op(x, y): result, changed = com._maybe_upcast_putmask(result,-mask,np.nan) result = result.reshape(x.shape) + # handles discrepancy between numpy and numexpr on division/mod by 0 + result = com._fill_zeros(result,y,fill_zeros) return result @Appender(_arith_doc % name) @@ -250,7 +250,7 @@ def _flex_comp_method(op, name, str_rep = None, default_axis='columns'): def na_op(x, y): try: - result = op(x, y) + result = expressions.evaluate(op, str_rep, x, y) except TypeError: xrav = x.ravel() result = np.empty(x.size, dtype=x.dtype) diff --git a/pandas/core/panel.py b/pandas/core/panel.py index 92f69a7444aab..16ddd4e5c8e06 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -23,6 +23,7 @@ from pandas.util import py3compat from pandas.util.decorators import deprecate, Appender, Substitution import pandas.core.common as com +import pandas.core.expressions as expressions import pandas.core.nanops as nanops import pandas.lib as lib @@ -90,24 +91,33 @@ def panel_index(time, panels, names=['time', 'panel']): return MultiIndex(levels, labels, sortorder=None, names=names) -def _arith_method(func, name): +def _arith_method(op, name, str_rep=None, fill_zeros=None, default_axis=None, **eval_kwargs): # work only for scalars + def na_op(x, y): + try: + result = expressions.evaluate(op, str_rep, x, y, raise_on_error=True, **eval_kwargs) + except TypeError: + result = op(x, y) + + # handles discrepancy between numpy and numexpr on division/mod by 0 + result = com._fill_zeros(result,y,fill_zeros) + return result def f(self, other): if not np.isscalar(other): raise ValueError('Simple arithmetic with %s can only be ' 'done with scalar values' % self._constructor.__name__) - return self._combine(other, func) + return self._combine(other, na_op) f.__name__ = name return f -def _comp_method(func, name): +def _comp_method(func, name, str_rep=None): def na_op(x, y): try: - result = func(x, y) + result = expressions.evaluate(func, str_rep, x, y) except TypeError: xrav = x.ravel() result = np.empty(x.size, dtype=x.dtype) diff --git a/pandas/core/series.py b/pandas/core/series.py index 9b11f7c7b0f66..2a1d438e3b4a8 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -32,6 +32,7 @@ import pandas.core.common as com import pandas.core.datetools as datetools import pandas.core.format as fmt +import pandas.core.expressions as expressions import pandas.core.generic as generic import pandas.core.nanops as nanops from pandas.util.decorators import Appender, Substitution, cache_readonly @@ -55,17 +56,14 @@ # Wrapper function for Series arithmetic methods -def _arith_method(op, name, fill_zeros=None): +def _arith_method(op, name, str_rep=None, fill_zeros=None, default_axis=None, **eval_kwargs): """ Wrapper function for Series arithmetic operations, to avoid code duplication. """ def na_op(x, y): try: - - result = op(x, y) - result = com._fill_zeros(result,y,fill_zeros) - + result = expressions.evaluate(op, str_rep, x, y, raise_on_error=True, **eval_kwargs) except TypeError: result = pa.empty(len(x), dtype=x.dtype) if isinstance(y, pa.Array): @@ -77,6 +75,8 @@ def na_op(x, y): result, changed = com._maybe_upcast_putmask(result,-mask,pa.NA) + # handles discrepancy between numpy and numexpr on division/mod by 0 + result = com._fill_zeros(result,y,fill_zeros) return result def wrapper(self, other, name=name): @@ -184,10 +184,12 @@ def wrap_results(x): lvalues = lvalues.values return Series(wrap_results(na_op(lvalues, rvalues)), index=self.index, name=self.name, dtype=dtype) + wrapper.__name__ = name return wrapper - -def _comp_method(op, name): +# Would it make sense to have this use numexpr instead? +# Should frame use vec_compare? +def _comp_method(op, name, str_rep=None): """ Wrapper function for Series arithmetic operations, to avoid code duplication. @@ -239,10 +241,11 @@ def wrapper(self, other): % type(other)) return Series(na_op(values, other), index=self.index, name=self.name) + wrapper.__name__ = name return wrapper -def _bool_method(op, name): +def _bool_method(op, name, str_rep=None): """ Wrapper function for Series arithmetic operations, to avoid code duplication. @@ -280,6 +283,7 @@ def wrapper(self, other): # scalars return Series(na_op(self.values, other), index=self.index, name=self.name) + wrapper.__name__ = name return wrapper @@ -306,8 +310,7 @@ def _maybe_match_name(a, b): name = a.name return name - -def _flex_method(op, name): +def _flex_method(op, name, str_rep=None, default_axis=None, fill_zeros=None, **eval_kwargs): doc = """ Binary operator %s with support to substitute a fill_value for missing data in one of the inputs @@ -326,18 +329,35 @@ def _flex_method(op, name): ------- result : Series """ % name + # copied directly from _arith_method above...we'll see whether this works + def na_op(x, y): + try: + result = expressions.evaluate(op, str_rep, x, y, raise_on_error=True, **eval_kwargs) + except TypeError: + result = pa.empty(len(x), dtype=x.dtype) + if isinstance(y, pa.Array): + mask = notnull(x) & notnull(y) + result[mask] = op(x[mask], y[mask]) + else: + mask = notnull(x) + result[mask] = op(x[mask], y) + + result, changed = com._maybe_upcast_putmask(result,-mask,pa.NA) + # handles discrepancy between numpy and numexpr on division/mod by 0 + result = com._fill_zeros(result,y,fill_zeros) + return result @Appender(doc) def f(self, other, level=None, fill_value=None): if isinstance(other, Series): - return self._binop(other, op, level=level, fill_value=fill_value) + return self._binop(other, na_op, level=level, fill_value=fill_value) elif isinstance(other, (pa.Array, list, tuple)): if len(other) != len(self): raise ValueError('Lengths must be equal') return self._binop(Series(other, self.index), op, level=level, fill_value=fill_value) else: - return Series(op(self.values, other), self.index, + return Series(na_op(self.values, other), self.index, name=self.name) f.__name__ = name From 5793a04bb9377786b35bedb8d3a6aeef00b21a5f Mon Sep 17 00:00:00 2001 From: Jeffrey Tratner Date: Wed, 26 Jun 2013 22:21:09 -0400 Subject: [PATCH 07/10] ENH: Abstract arithmetic into core/generic SparsePanel has to opt-out because it doesn't respond to ``shape``. ENH: Add flex comparison methods to Series and Panel --- pandas/core/frame.py | 77 +++--------------------------------------- pandas/core/panel.py | 66 ++++++++++-------------------------- pandas/core/series.py | 62 ++++------------------------------ pandas/sparse/panel.py | 3 +- 4 files changed, 30 insertions(+), 178 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 3731057aa3049..fcade2b3700fe 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -834,63 +834,6 @@ def __contains__(self, key): """True if DataFrame has this column""" return key in self.columns - #---------------------------------------------------------------------- - # Arithmetic methods - - add = _arith_method(operator.add, 'add', '+') - mul = _arith_method(operator.mul, 'multiply', '*') - sub = _arith_method(operator.sub, 'subtract', '-') - div = divide = _arith_method(lambda x, y: x / y, 'divide', '/') - pow = _arith_method(operator.pow, 'pow', '**') - mod = _arith_method(lambda x, y: x % y, 'mod') - - radd = _arith_method(_radd_compat, 'radd') - rmul = _arith_method(operator.mul, 'rmultiply') - rsub = _arith_method(lambda x, y: y - x, 'rsubtract') - rdiv = _arith_method(lambda x, y: y / x, 'rdivide') - rpow = _arith_method(lambda x, y: y ** x, 'rpow') - rmod = _arith_method(lambda x, y: y % x, 'rmod') - - __add__ = _arith_method(operator.add, '__add__', '+', default_axis=None) - __sub__ = _arith_method(operator.sub, '__sub__', '-', default_axis=None) - __mul__ = _arith_method(operator.mul, '__mul__', '*', default_axis=None) - __truediv__ = _arith_method(operator.truediv, '__truediv__', '/', - default_axis=None, fill_zeros=np.inf, truediv=True) - # numexpr produces a different value (python/numpy: 0.000, numexpr: inf) - # when dividing by zero, so can't use floordiv speed up (yet) - # __floordiv__ = _arith_method(operator.floordiv, '__floordiv__', '//', - __floordiv__ = _arith_method(operator.floordiv, '__floordiv__', - default_axis=None, fill_zeros=np.inf) - __pow__ = _arith_method(operator.pow, '__pow__', '**', default_axis=None) - - # currently causes a floating point exception to occur - so sticking with unaccelerated for now - # __mod__ = _arith_method(operator.mod, '__mod__', '%', default_axis=None, fill_zeros=np.nan) - __mod__ = _arith_method(operator.mod, '__mod__', default_axis=None, fill_zeros=np.nan) - - __radd__ = _arith_method(_radd_compat, '__radd__', default_axis=None) - __rmul__ = _arith_method(operator.mul, '__rmul__', default_axis=None) - __rsub__ = _arith_method(lambda x, y: y - x, '__rsub__', default_axis=None) - __rtruediv__ = _arith_method(lambda x, y: y / x, '__rtruediv__', - default_axis=None, fill_zeros=np.inf) - __rfloordiv__ = _arith_method(lambda x, y: y // x, '__rfloordiv__', - default_axis=None, fill_zeros=np.inf) - __rpow__ = _arith_method(lambda x, y: y ** x, '__rpow__', - default_axis=None) - __rmod__ = _arith_method(lambda x, y: y % x, '__rmod__', default_axis=None, - fill_zeros=np.nan) - - # boolean operators - __and__ = _arith_method(operator.and_, '__and__', '&') - __or__ = _arith_method(operator.or_, '__or__', '|') - __xor__ = _arith_method(operator.xor, '__xor__') - - # Python 2 division methods - if not py3compat.PY3: - __div__ = _arith_method(operator.div, '__div__', '/', - default_axis=None, fill_zeros=np.inf, truediv=False) - __rdiv__ = _arith_method(lambda x, y: y / x, '__rdiv__', - default_axis=None, fill_zeros=np.inf) - def __neg__(self): arr = operator.neg(self.values) return self._wrap_array(arr, self.axes, copy=False) @@ -899,21 +842,6 @@ def __invert__(self): arr = operator.inv(self.values) return self._wrap_array(arr, self.axes, copy=False) - # Comparison methods - __eq__ = _comp_method(operator.eq, '__eq__', '==') - __ne__ = _comp_method(operator.ne, '__ne__', '!=') - __lt__ = _comp_method(operator.lt, '__lt__', '<' ) - __gt__ = _comp_method(operator.gt, '__gt__', '>' ) - __le__ = _comp_method(operator.le, '__le__', '<=') - __ge__ = _comp_method(operator.ge, '__ge__', '>=') - - eq = _flex_comp_method(operator.eq, 'eq', '==') - ne = _flex_comp_method(operator.ne, 'ne', '!=') - lt = _flex_comp_method(operator.lt, 'lt', '<') - gt = _flex_comp_method(operator.gt, 'gt', '>') - le = _flex_comp_method(operator.le, 'le', '<=') - ge = _flex_comp_method(operator.ge, 'ge', '>=') - def dot(self, other): """ Matrix multiplication with DataFrame or Series objects @@ -6029,7 +5957,10 @@ def boxplot(self, column=None, by=None, ax=None, fontsize=None, return ax DataFrame.boxplot = boxplot - +DataFrame._add_flex_arithmetic_methods(_arith_method, radd_func=_radd_compat, + flex_comp_method=_flex_comp_method) +DataFrame._add_special_arithmetic_methods(_arith_method, radd_func=_radd_compat, + comp_method=_comp_method, bool_method=_arith_method) if __name__ == '__main__': import nose nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], diff --git a/pandas/core/panel.py b/pandas/core/panel.py index 16ddd4e5c8e06..3de9c9a814f02 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -223,24 +223,6 @@ def _construct_axes_dict_for_slice(self, axes=None, **kwargs): d.update(kwargs) return d - __add__ = _arith_method(operator.add, '__add__') - __sub__ = _arith_method(operator.sub, '__sub__') - __truediv__ = _arith_method(operator.truediv, '__truediv__') - __floordiv__ = _arith_method(operator.floordiv, '__floordiv__') - __mul__ = _arith_method(operator.mul, '__mul__') - __pow__ = _arith_method(operator.pow, '__pow__') - - __radd__ = _arith_method(operator.add, '__radd__') - __rmul__ = _arith_method(operator.mul, '__rmul__') - __rsub__ = _arith_method(lambda x, y: y - x, '__rsub__') - __rtruediv__ = _arith_method(lambda x, y: y / x, '__rtruediv__') - __rfloordiv__ = _arith_method(lambda x, y: y // x, '__rfloordiv__') - __rpow__ = _arith_method(lambda x, y: y ** x, '__rpow__') - - if not py3compat.PY3: - __div__ = _arith_method(operator.div, '__div__') - __rdiv__ = _arith_method(lambda x, y: y / x, '__rdiv__') - def __init__(self, data=None, items=None, major_axis=None, minor_axis=None, copy=False, dtype=None): self._init_data( @@ -458,21 +440,6 @@ def __neg__(self): def __invert__(self): return -1 * self - # Comparison methods - __eq__ = _comp_method(operator.eq, '__eq__') - __ne__ = _comp_method(operator.ne, '__ne__') - __lt__ = _comp_method(operator.lt, '__lt__') - __gt__ = _comp_method(operator.gt, '__gt__') - __le__ = _comp_method(operator.le, '__le__') - __ge__ = _comp_method(operator.ge, '__ge__') - - eq = _comp_method(operator.eq, 'eq') - ne = _comp_method(operator.ne, 'ne') - gt = _comp_method(operator.gt, 'gt') - lt = _comp_method(operator.lt, 'lt') - ge = _comp_method(operator.ge, 'ge') - le = _comp_method(operator.le, 'le') - #---------------------------------------------------------------------- # Magic methods @@ -1621,7 +1588,7 @@ def _extract_axis(self, data, axis=0, intersect=False): return _ensure_index(index) @classmethod - def _add_aggregate_operations(cls): + def _add_aggregate_operations(cls, use_numexpr=True): """ add the operations to the cls; evaluate the doc strings again """ # doc strings substitors @@ -1638,25 +1605,27 @@ def _add_aggregate_operations(cls): ------- """ + cls.__name__ + "\n" - def _panel_arith_method(op, name): + def _panel_arith_method(op, name, str_rep = None, default_axis=None, fill_zeros=None, **eval_kwargs): + def na_op(x, y): + try: + result = expressions.evaluate(op, str_rep, x, y, raise_on_error=True, **eval_kwargs) + except TypeError: + result = op(x, y) + + # handles discrepancy between numpy and numexpr on division/mod by 0 + # though, given that these are generally (always?) non-scalars, I'm + # not sure whether it's worth it at the moment + result = com._fill_zeros(result,y,fill_zeros) + return result @Substitution(op) @Appender(_agg_doc) def f(self, other, axis=0): - return self._combine(other, op, axis=axis) + return self._combine(other, na_op, axis=axis) f.__name__ = name return f - - cls.add = _panel_arith_method(operator.add, 'add') - cls.subtract = cls.sub = _panel_arith_method(operator.sub, 'subtract') - cls.multiply = cls.mul = _panel_arith_method(operator.mul, 'multiply') - - try: - cls.divide = cls.div = _panel_arith_method(operator.div, 'divide') - except AttributeError: # pragma: no cover - # Python 3 - cls.divide = cls.div = _panel_arith_method( - operator.truediv, 'divide') - + # add `div`, `mul`, `pow`, etc.. + cls._add_flex_arithmetic_methods(_panel_arith_method, + use_numexpr=use_numexpr, flex_comp_method=_comp_method) _agg_doc = """ Return %(desc)s over requested axis @@ -1737,6 +1706,7 @@ def min(self, axis='major', skipna=True): return self._reduce(nanops.nanmin, axis=axis, skipna=skipna) cls.min = min +Panel._add_special_arithmetic_methods(_arith_method, comp_method=_comp_method) Panel._add_aggregate_operations() WidePanel = Panel diff --git a/pandas/core/series.py b/pandas/core/series.py index 2a1d438e3b4a8..99e844088b190 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1269,33 +1269,6 @@ def iteritems(self): if py3compat.PY3: # pragma: no cover items = iteritems - #---------------------------------------------------------------------- - # Arithmetic operators - - __add__ = _arith_method(operator.add, '__add__') - __sub__ = _arith_method(operator.sub, '__sub__') - __mul__ = _arith_method(operator.mul, '__mul__') - __truediv__ = _arith_method(operator.truediv, '__truediv__', fill_zeros=np.inf) - __floordiv__ = _arith_method(operator.floordiv, '__floordiv__', fill_zeros=np.inf) - __pow__ = _arith_method(operator.pow, '__pow__') - __mod__ = _arith_method(operator.mod, '__mod__', fill_zeros=np.nan) - - __radd__ = _arith_method(_radd_compat, '__add__') - __rmul__ = _arith_method(operator.mul, '__mul__') - __rsub__ = _arith_method(lambda x, y: y - x, '__sub__') - __rtruediv__ = _arith_method(lambda x, y: y / x, '__truediv__', fill_zeros=np.inf) - __rfloordiv__ = _arith_method(lambda x, y: y // x, '__floordiv__', fill_zeros=np.inf) - __rpow__ = _arith_method(lambda x, y: y ** x, '__pow__') - __rmod__ = _arith_method(lambda x, y: y % x, '__mod__', fill_zeros=np.nan) - - # comparisons - __gt__ = _comp_method(operator.gt, '__gt__') - __ge__ = _comp_method(operator.ge, '__ge__') - __lt__ = _comp_method(operator.lt, '__lt__') - __le__ = _comp_method(operator.le, '__le__') - __eq__ = _comp_method(operator.eq, '__eq__') - __ne__ = _comp_method(operator.ne, '__ne__') - # inversion def __neg__(self): arr = operator.neg(self.values) @@ -1305,25 +1278,6 @@ def __invert__(self): arr = operator.inv(self.values) return Series(arr, self.index, name=self.name) - # binary logic - __or__ = _bool_method(operator.or_, '__or__') - __and__ = _bool_method(operator.and_, '__and__') - __xor__ = _bool_method(operator.xor, '__xor__') - - # Inplace operators - __iadd__ = __add__ - __isub__ = __sub__ - __imul__ = __mul__ - __itruediv__ = __truediv__ - __ifloordiv__ = __floordiv__ - __ipow__ = __pow__ - - # Python 2 division operators - if not py3compat.PY3: - __div__ = _arith_method(operator.div, '__div__', fill_zeros=np.inf) - __rdiv__ = _arith_method(lambda x, y: y / x, '__div__', fill_zeros=np.inf) - __idiv__ = __div__ - #---------------------------------------------------------------------- # unbox reductions @@ -2139,16 +2093,6 @@ def _binop(self, other, func, level=None, fill_value=None): name = _maybe_match_name(self, other) return Series(result, index=new_index, name=name) - add = _flex_method(operator.add, 'add') - sub = _flex_method(operator.sub, 'subtract') - mul = _flex_method(operator.mul, 'multiply') - try: - div = _flex_method(operator.div, 'divide') - except AttributeError: # pragma: no cover - # Python 3 - div = _flex_method(operator.truediv, 'divide') - mod = _flex_method(operator.mod, 'mod') - def combine(self, other, func, fill_value=nan): """ Perform elementwise binary operation on two Series using given function @@ -3590,3 +3534,9 @@ def to_period(self, freq=None, copy=True): freq = self.index.freqstr or self.index.inferred_freq new_index = self.index.to_period(freq=freq) return Series(new_values, index=new_index, name=self.name) + +# Add arithmetic! +Series._add_flex_arithmetic_methods(_flex_method, radd_func=_radd_compat, + flex_comp_method=_comp_method) +Series._add_special_arithmetic_methods(_arith_method, radd_func=_radd_compat, comp_method=_comp_method, + bool_method=_bool_method) diff --git a/pandas/sparse/panel.py b/pandas/sparse/panel.py index 246e6fa93918f..f39cb16ad7a67 100644 --- a/pandas/sparse/panel.py +++ b/pandas/sparse/panel.py @@ -445,7 +445,8 @@ def minor_xs(self, key): columns=self.items, default_fill_value=self.default_fill_value, default_kind=self.default_kind) - +# need to redo the aggregate funcs because SparsePanel doesn't respond to shape, etc. +SparsePanel._add_aggregate_operations(use_numexpr=False) SparseWidePanel = SparsePanel From 75722308c21db1c2a186ab737d3dd6f7c14bf689 Mon Sep 17 00:00:00 2001 From: Jeffrey Tratner Date: Wed, 26 Jun 2013 22:25:09 -0400 Subject: [PATCH 08/10] CLN: PEP8 the tests/test_expressions --- pandas/tests/test_expressions.py | 292 +++++++++++++++++++++++-------- 1 file changed, 215 insertions(+), 77 deletions(-) diff --git a/pandas/tests/test_expressions.py b/pandas/tests/test_expressions.py index ba0a9926dfa78..3bd5a83053ec8 100644 --- a/pandas/tests/test_expressions.py +++ b/pandas/tests/test_expressions.py @@ -1,137 +1,275 @@ # pylint: disable-msg=W0612,E1101 - import unittest -import nose - import operator -from numpy import random, nan -from numpy.random import randn + +import nose import numpy as np from numpy.testing import assert_array_equal +from numpy.testing.decorators import slow -import pandas as pan -from pandas.core.api import DataFrame, Series, notnull, isnull +from pandas.core.api import Panel, DataFrame from pandas.core import expressions as expr - from pandas.util.testing import (assert_almost_equal, assert_series_equal, - assert_frame_equal) -from pandas.util import py3compat - + assert_frame_equal, + assert_panel_equal, + assert_panel4d_equal) import pandas.util.testing as tm -import pandas.lib as lib +from pandas.util import py3compat -from numpy.testing.decorators import slow if not expr._USE_NUMEXPR: raise nose.SkipTest -_frame = DataFrame(np.random.randn(10000, 4), columns = list('ABCD'), dtype='float64') -_frame2 = DataFrame(np.random.randn(100, 4), columns = list('ABCD'), dtype='float64') -_mixed = DataFrame({ 'A' : _frame['A'].copy(), 'B' : _frame['B'].astype('float32'), 'C' : _frame['C'].astype('int64'), 'D' : _frame['D'].astype('int32') }) -_mixed2 = DataFrame({ 'A' : _frame2['A'].copy(), 'B' : _frame2['B'].astype('float32'), 'C' : _frame2['C'].astype('int64'), 'D' : _frame2['D'].astype('int32') }) -_integer = DataFrame(np.random.randint(1, 100, size=(10001, 4)), columns = list('ABCD'), dtype='int64') +_frame = DataFrame(np.random.randn(10000, 4), columns=list('ABCD'), dtype='float64') +_frame2 = DataFrame(np.random.randn(100, 4), columns=list('ABCD'), dtype='float64') +_mixed = DataFrame({'A': _frame['A'].copy(), 'B': _frame['B'].astype('float32'), 'C': _frame['C'].astype('int64'), + 'D': _frame['D'].astype('int32')}) +_mixed2 = DataFrame({'A': _frame2['A'].copy(), 'B': _frame2['B'].astype('float32'), 'C': _frame2['C'].astype('int64'), + 'D': _frame2['D'].astype('int32')}) +_integer = DataFrame(np.random.randint(1, 100, size=(10001, 4)), columns=list('ABCD'), dtype='int64') +_integer2 = DataFrame(np.random.randint(1, 100, size=(101, 4)), columns=list('ABCD'), dtype='int64') +_frame_panel = Panel(dict(ItemA=_frame.copy(), ItemB=(_frame.copy() + 3), ItemC=_frame.copy(), ItemD=_frame.copy())) +_frame2_panel = Panel( + dict(ItemA=_frame2.copy(), ItemB=(_frame2.copy() + 3), ItemC=_frame2.copy(), ItemD=_frame2.copy())) +_integer_panel = Panel(dict(ItemA=_integer, ItemB=(_integer + 34).astype('int64'))) +_integer2_panel = Panel(dict(ItemA=_integer2, ItemB=(_integer2 + 34).astype('int64'))) +_mixed_panel = Panel(dict(ItemA=_mixed, ItemB=(_mixed + 3))) +_mixed2_panel = Panel(dict(ItemA=_mixed2, ItemB=(_mixed2 + 3))) -class TestExpressions(unittest.TestCase): +class TestExpressions(unittest.TestCase): _multiprocess_can_split_ = False def setUp(self): - self.frame = _frame.copy() + self.frame = _frame.copy() self.frame2 = _frame2.copy() - self.mixed = _mixed.copy() + self.mixed = _mixed.copy() self.mixed2 = _mixed2.copy() self.integer = _integer.copy() self._MIN_ELEMENTS = expr._MIN_ELEMENTS def tearDown(self): expr._MIN_ELEMENTS = self._MIN_ELEMENTS + expr.set_test_mode(False) - #TODO: add test for Panel #TODO: add tests for binary operations @nose.tools.nottest - def run_arithmetic_test(self, df, assert_func, check_dtype=False): + def run_arithmetic_test(self, df, other, assert_func, check_dtype=False, + test_flex=False, numexpr_ops=set(('add', 'sub', 'mul', 'truediv', + 'div', 'pow'))): + """ + tests solely that the result is the same whether or not numexpr is enabled. + Need to test whether the function does the correct thing elsewhere. + """ expr._MIN_ELEMENTS = 0 - operations = ['add', 'sub', 'mul','mod','truediv','floordiv','pow'] + expr.set_test_mode(True) + operations = ['add', 'sub', 'mul', 'mod', 'truediv', 'floordiv', 'pow'] if not py3compat.PY3: operations.append('div') for arith in operations: - op = getattr(operator, arith) + if test_flex: + op = getattr(df, arith) + else: + op = getattr(operator, arith) expr.set_use_numexpr(False) - expected = op(df, df) + expected = op(df, other) expr.set_use_numexpr(True) - result = op(df, df) + expr.get_test_result() + result = op(df, other) + used_numexpr = expr.get_test_result() try: if check_dtype: if arith == 'div': - assert expected.dtype.kind == df.dtype.kind + assert expected.dtype.kind == result.dtype.kind if arith == 'truediv': - assert expected.dtype.kind == 'f' + assert result.dtype.kind == 'f' + if arith in numexpr_ops: + assert used_numexpr, "Did not use numexpr as expected." + else: + assert not used_numexpr, "Used numexpr unexpectedly." assert_func(expected, result) except Exception: - print("Failed test with operator %r" % op.__name__) + print("Failed test with func %r" % op) + print("test_flex was %r" % test_flex) raise - def test_integer_arithmetic(self): - self.run_arithmetic_test(self.integer, assert_frame_equal) - self.run_arithmetic_test(self.integer.icol(0), assert_series_equal, - check_dtype=True) - - def test_float_arithemtic(self): - self.run_arithmetic_test(self.frame, assert_frame_equal) - self.run_arithmetic_test(self.frame.icol(0), assert_series_equal, - check_dtype=True) - - def test_mixed_arithmetic(self): - self.run_arithmetic_test(self.mixed, assert_frame_equal) - for col in self.mixed.columns: - self.run_arithmetic_test(self.mixed[col], assert_series_equal) + @nose.tools.nottest + def run_binary_test(self, df, other, assert_func, check_dtype=False, + test_flex=False, numexpr_ops=set(['gt', 'lt', 'ge', 'le', 'eq', 'ne'])): + """ + tests solely that the result is the same whether or not numexpr is enabled. + Need to test whether the function does the correct thing elsewhere. + """ + expr._MIN_ELEMENTS = 0 + expr.set_test_mode(True) + operations = ['gt', 'lt', 'ge', 'le', 'eq', 'ne'] + found_error = False + print "Running binary test with other - ", type(other) + for arith in operations: + if test_flex: + op = getattr(df, arith) + else: + op = getattr(operator, arith) + expr.set_use_numexpr(False) + expected = op(df, other) + expr.set_use_numexpr(True) + expr.get_test_result() + result = op(df, other) + used_numexpr = expr.get_test_result() + try: + if check_dtype: + if arith == 'div': + assert expected.dtype.kind == result.dtype.kind + if arith == 'truediv': + assert result.dtype.kind == 'f' + if arith in numexpr_ops: + assert used_numexpr, "Did not use numexpr as expected." + else: + assert not used_numexpr, "Used numexpr unexpectedly." + assert_func(expected, result) + except Exception as e: + print("-----------Failed test with func %r" % op) + print("-----------test_flex was %r" % test_flex) + print "-----------", e + found_error = True + if found_error: + raise AssertionError("found an error") + + def run_frame(self, df, other, binary_comp=None, run_binary=True, **kwargs): + self.run_arithmetic_test(df, other, assert_frame_equal, test_flex=False, + **kwargs) + self.run_arithmetic_test(df, other, assert_frame_equal, test_flex=True, + **kwargs) + if run_binary: + print repr(binary_comp) + if binary_comp is None: + expr.set_use_numexpr(False) + binary_comp = other + 1 + expr.set_use_numexpr(True) + self.run_binary_test(df, binary_comp, assert_frame_equal, test_flex=False, + **kwargs) + self.run_binary_test(df, binary_comp, assert_frame_equal, test_flex=True, + **kwargs) + + def run_series(self, ser, other, binary_comp=None, **kwargs): + self.run_arithmetic_test(ser, other, assert_series_equal, test_flex=False, **kwargs) + self.run_arithmetic_test(ser, other, assert_almost_equal, test_flex=True, **kwargs) + # series doesn't uses vec_compare instead of numexpr... + # if binary_comp is None: + # binary_comp = other + 1 + # self.run_binary_test(ser, binary_comp, assert_frame_equal, test_flex=False, + # **kwargs) + # self.run_binary_test(ser, binary_comp, assert_frame_equal, test_flex=True, + # **kwargs) + + def run_panel(self, panel, other, binary_comp=None, run_binary=True, + assert_func=assert_panel_equal, **kwargs): + self.run_arithmetic_test(panel, other, assert_func, test_flex=False, **kwargs) + self.run_arithmetic_test(panel, other, assert_func, test_flex=True, **kwargs) + if run_binary: + if binary_comp is None: + binary_comp = other + 1 + self.run_binary_test(panel, binary_comp, assert_func, + test_flex=False, **kwargs) + # Panel doesn't support flex comparison methods... + # self.run_binary_test(panel, binary_comp, assert_panel_equal, + # test_flex=True, **kwargs) + + def test_integer_arithmetic_frame(self): + self.run_frame(self.integer, self.integer) + + def test_integer_arithmetic_series(self): + self.run_series(self.integer.icol(0), self.integer.icol(0)) + + @slow + def test_integer_panel(self): + self.run_panel(_integer2_panel, np.random.randint(1, 100)) + + def test_float_arithemtic_frame(self): + self.run_frame(self.frame2, self.frame2) + + def test_float_arithmetic_series(self): + self.run_series(self.frame2.icol(0), self.frame2.icol(0)) + + @slow + def test_float_panel(self): + self.run_panel(_frame2_panel, np.random.randn() + 0.1, binary_comp=0.8) + + @slow + def test_panel4d(self): + self.run_panel(tm.makePanel4D(), np.random.randn() + 0.5, + assert_func=assert_panel4d_equal, binary_comp=3) + + def test_mixed_arithmetic_frame(self): + # TODO: FIGURE OUT HOW TO GET IT TO WORK... + # can't do arithmetic because comparison methods try to do *entire* + # frame instead of by-column + self.run_frame(self.mixed2, self.mixed2, run_binary=False) + + def test_mixed_arithmetic_series(self): + for col in self.mixed2.columns: + self.run_series(self.mixed2[col], self.mixed2[col], binary_comp=4) + + @slow + def test_mixed_panel(self): + self.run_panel(_mixed2_panel, np.random.randint(1, 100), binary_comp=-2) def test_integer_with_zeros(self): - self.integer *= np.random.randint(0, 2, size=np.shape(self.integer)) - self.run_arithmetic_test(self.integer, assert_frame_equal) - self.run_arithmetic_test(self.integer.icol(0), assert_series_equal) + integer = _integer2 * np.random.randint(0, 2, + size=np.shape(_integer2)) + self.run_frame(integer, integer) + self.run_series(integer.icol(0), integer.icol(0), binary_comp=0) + + @slow + def test_integer_panel_with_zeros(self): + # this probably isn't the greatest test, but whatever + self.run_panel(_mixed2_panel, 0, binary_comp=1) def test_invalid(self): # no op - result = expr._can_use_numexpr(operator.add, None, self.frame, self.frame, 'evaluate') + result = expr._can_use_numexpr(operator.add, None, self.frame, self.frame, 'evaluate') self.assert_(result == False) # mixed - result = expr._can_use_numexpr(operator.add, '+', self.mixed, self.frame, 'evaluate') + result = expr._can_use_numexpr(operator.add, '+', self.mixed, self.frame, 'evaluate') self.assert_(result == False) # min elements - result = expr._can_use_numexpr(operator.add, '+', self.frame2, self.frame2, 'evaluate') + result = expr._can_use_numexpr(operator.add, '+', self.frame2, self.frame2, 'evaluate') self.assert_(result == False) # ok, we only check on first part of expression - result = expr._can_use_numexpr(operator.add, '+', self.frame, self.frame2, 'evaluate') + result = expr._can_use_numexpr(operator.add, '+', self.frame, self.frame2, 'evaluate') + self.assert_(result == True) + + result = expr._can_use_numexpr(operator.add, '+', _frame_panel, + 3, 'evaluate') self.assert_(result == True) def test_binary_ops(self): def testit(): - for f, f2 in [ (self.frame, self.frame2), (self.mixed, self.mixed2) ]: + for f, f2 in [(self.frame, self.frame2), (self.mixed, self.mixed2)]: - for op, op_str in [('add','+'),('sub','-'),('mul','*'),('div','/'),('pow','**')]: + for op, op_str in [('add', '+'), ('sub', '-'), ('mul', '*'), ('div', '/'), ('pow', '**')]: - op = getattr(operator,op,None) + op = getattr(operator, op, None) if op is not None: - result = expr._can_use_numexpr(op, op_str, f, f, 'evaluate') + result = expr._can_use_numexpr(op, op_str, f, f, 'evaluate') self.assert_(result == (not f._is_mixed_type)) - result = expr.evaluate(op, op_str, f, f, use_numexpr=True) + result = expr.evaluate(op, op_str, f, f, use_numexpr=True) expected = expr.evaluate(op, op_str, f, f, use_numexpr=False) - assert_array_equal(result,expected.values) - - result = expr._can_use_numexpr(op, op_str, f2, f2, 'evaluate') + assert_array_equal(result, expected.values) + + result = expr._can_use_numexpr(op, op_str, f2, f2, 'evaluate') self.assert_(result == False) - + expr.set_use_numexpr(False) testit() expr.set_use_numexpr(True) @@ -144,26 +282,25 @@ def test_boolean_ops(self): def testit(): - for f, f2 in [ (self.frame, self.frame2), (self.mixed, self.mixed2) ]: + for f, f2 in [(self.frame, self.frame2), (self.mixed, self.mixed2)]: f11 = f f12 = f + 1 - + f21 = f2 f22 = f2 + 1 - for op, op_str in [('gt','>'),('lt','<'),('ge','>='),('le','<='),('eq','=='),('ne','!=')]: + for op, op_str in [('gt', '>'), ('lt', '<'), ('ge', '>='), ('le', '<='), ('eq', '=='), ('ne', '!=')]: + op = getattr(operator, op) - op = getattr(operator,op) - - result = expr._can_use_numexpr(op, op_str, f11, f12, 'evaluate') + result = expr._can_use_numexpr(op, op_str, f11, f12, 'evaluate') self.assert_(result == (not f11._is_mixed_type)) - result = expr.evaluate(op, op_str, f11, f12, use_numexpr=True) + result = expr.evaluate(op, op_str, f11, f12, use_numexpr=True) expected = expr.evaluate(op, op_str, f11, f12, use_numexpr=False) - assert_array_equal(result,expected.values) - - result = expr._can_use_numexpr(op, op_str, f21, f22, 'evaluate') + assert_array_equal(result, expected.values) + + result = expr._can_use_numexpr(op, op_str, f21, f22, 'evaluate') self.assert_(result == False) expr.set_use_numexpr(False) @@ -177,16 +314,15 @@ def testit(): def test_where(self): def testit(): - for f in [ self.frame, self.frame2, self.mixed, self.mixed2 ]: + for f in [self.frame, self.frame2, self.mixed, self.mixed2]: - - for cond in [ True, False ]: - c = np.empty(f.shape,dtype=np.bool_) + for cond in [True, False]: + c = np.empty(f.shape, dtype=np.bool_) c.fill(cond) - result = expr.where(c, f.values, f.values+1) - expected = np.where(c, f.values, f.values+1) - assert_array_equal(result,expected) + result = expr.where(c, f.values, f.values + 1) + expected = np.where(c, f.values, f.values + 1) + assert_array_equal(result, expected) expr.set_use_numexpr(False) testit() @@ -196,8 +332,10 @@ def testit(): expr.set_numexpr_threads() testit() + if __name__ == '__main__': # unittest.main() import nose + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) From 0c455ba05df03ce504c97792543e5b4ab61fb748 Mon Sep 17 00:00:00 2001 From: Jeffrey Tratner Date: Wed, 26 Jun 2013 22:32:21 -0400 Subject: [PATCH 09/10] DOC: Document entire range of arithmetic methods in api.rst --- doc/source/api.rst | 66 +++++++++++++++++++++++++++++++++++++----- doc/source/release.rst | 3 ++ doc/source/v0.12.0.txt | 4 +++ 3 files changed, 65 insertions(+), 8 deletions(-) diff --git a/doc/source/api.rst b/doc/source/api.rst index 7e863a4429487..5e7f7cf1004be 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -248,12 +248,30 @@ Binary operator functions :toctree: generated/ Series.add - Series.div - Series.mul Series.sub + Series.mul + Series.div + Series.truediv + Series.floordiv + Series.mod + Series.pow + Series.radd + Series.rsub + Series.rmul + Series.rdiv + Series.rtruediv + Series.rfloordiv + Series.rmod + Series.rpow Series.combine Series.combine_first Series.round + Series.lt + Series.gt + Series.le + Series.ge + Series.ne + Series.eq Function application, GroupBy ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -451,13 +469,27 @@ Binary operator functions :toctree: generated/ DataFrame.add - DataFrame.div - DataFrame.mul DataFrame.sub + DataFrame.mul + DataFrame.div + DataFrame.truediv + DataFrame.floordiv + DataFrame.mod + DataFrame.pow DataFrame.radd - DataFrame.rdiv - DataFrame.rmul DataFrame.rsub + DataFrame.rmul + DataFrame.rdiv + DataFrame.rtruediv + DataFrame.rfloordiv + DataFrame.rmod + DataFrame.rpow + DataFrame.lt + DataFrame.gt + DataFrame.le + DataFrame.ge + DataFrame.ne + DataFrame.eq DataFrame.combine DataFrame.combineAdd DataFrame.combine_first @@ -680,9 +712,27 @@ Binary operator functions :toctree: generated/ Panel.add - Panel.div - Panel.mul Panel.sub + Panel.mul + Panel.div + Panel.truediv + Panel.floordiv + Panel.mod + Panel.pow + Panel.radd + Panel.rsub + Panel.rmul + Panel.rdiv + Panel.rtruediv + Panel.rfloordiv + Panel.rmod + Panel.rpow + Panel.lt + Panel.gt + Panel.le + Panel.ge + Panel.ne + Panel.eq Function application, GroupBy ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/source/release.rst b/doc/source/release.rst index 0fa7b4b2ed5f2..f459d05f278fe 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -54,6 +54,9 @@ pandas 0.12 - Access to historical Google Finance data in pandas.io.data (:issue:`3814`) - DataFrame plotting methods can sample column colors from a Matplotlib colormap via the ``colormap`` keyword. (:issue:`3860`) + - All non-Index NDFrames (``Series``, ``DataFrame``, ``Panel``, ``Panel4D``, + ``SparsePanel``, etc.), now support the entire set of arithmetic operators + and arithmetic flex methods (add, sub, mul, pow, etc.). (:issue:`3765`) **Improvements to existing features** diff --git a/doc/source/v0.12.0.txt b/doc/source/v0.12.0.txt index 4b100ed0b5fab..69a1c5372fecb 100644 --- a/doc/source/v0.12.0.txt +++ b/doc/source/v0.12.0.txt @@ -50,6 +50,10 @@ API changes p / p p / 0 + - All non-Index NDFrames (``Series``, ``DataFrame``, ``Panel``, ``Panel4D``, + ``SparsePanel``, etc.), now support the entire set of arithmetic operators + and arithmetic flex methods (add, sub, mul, pow, etc.). (:issue:`3765`) + - Add ``squeeze`` keyword to ``groupby`` to allow reduction from DataFrame -> Series if groups are unique. This is a Regression from 0.10.1. We are reverting back to the prior behavior. This means groupby will return the From 66259e76a1a08014e6384b7e4f9e5d9691fe0b60 Mon Sep 17 00:00:00 2001 From: Jeffrey Tratner Date: Thu, 27 Jun 2013 00:49:08 -0400 Subject: [PATCH 10/10] TST: Flesh out test cases for Series and Panel to include all arithmetic ops --- pandas/tests/test_expressions.py | 24 ++++++++---------- pandas/tests/test_frame.py | 8 +++++- pandas/tests/test_panel.py | 42 +++++++++++++++++++++++++------- pandas/tests/test_series.py | 8 +++--- 4 files changed, 55 insertions(+), 27 deletions(-) diff --git a/pandas/tests/test_expressions.py b/pandas/tests/test_expressions.py index 3bd5a83053ec8..0dabc073a6c5b 100644 --- a/pandas/tests/test_expressions.py +++ b/pandas/tests/test_expressions.py @@ -70,7 +70,8 @@ def run_arithmetic_test(self, df, other, assert_func, check_dtype=False, operations.append('div') for arith in operations: if test_flex: - op = getattr(df, arith) + op = lambda x, y : getattr(df, arith)(y) + op.__name__ = arith else: op = getattr(operator, arith) expr.set_use_numexpr(False) @@ -105,11 +106,10 @@ def run_binary_test(self, df, other, assert_func, check_dtype=False, expr._MIN_ELEMENTS = 0 expr.set_test_mode(True) operations = ['gt', 'lt', 'ge', 'le', 'eq', 'ne'] - found_error = False - print "Running binary test with other - ", type(other) for arith in operations: if test_flex: - op = getattr(df, arith) + op = lambda x, y: getattr(df, arith)(y) + op.__name__ = arith else: op = getattr(operator, arith) expr.set_use_numexpr(False) @@ -129,13 +129,10 @@ def run_binary_test(self, df, other, assert_func, check_dtype=False, else: assert not used_numexpr, "Used numexpr unexpectedly." assert_func(expected, result) - except Exception as e: - print("-----------Failed test with func %r" % op) - print("-----------test_flex was %r" % test_flex) - print "-----------", e - found_error = True - if found_error: - raise AssertionError("found an error") + except Exception: + print("Failed test with operation %r" % arith) + print("test_flex was %r" % test_flex) + raise def run_frame(self, df, other, binary_comp=None, run_binary=True, **kwargs): self.run_arithmetic_test(df, other, assert_frame_equal, test_flex=False, @@ -173,9 +170,8 @@ def run_panel(self, panel, other, binary_comp=None, run_binary=True, binary_comp = other + 1 self.run_binary_test(panel, binary_comp, assert_func, test_flex=False, **kwargs) - # Panel doesn't support flex comparison methods... - # self.run_binary_test(panel, binary_comp, assert_panel_equal, - # test_flex=True, **kwargs) + self.run_binary_test(panel, binary_comp, assert_func, + test_flex=True, **kwargs) def test_integer_arithmetic_frame(self): self.run_frame(self.integer, self.integer) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 92d87b059885b..f94f098a822d1 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -4166,7 +4166,7 @@ def test_first_last_valid(self): self.assert_(index == frame.index[-6]) def test_arith_flex_frame(self): - ops = ['add', 'sub', 'mul','div', 'truediv', 'pow', 'floordiv', 'mod'] + ops = ['add', 'sub', 'mul', 'div', 'truediv', 'pow', 'floordiv', 'mod'] if not py3compat.PY3: aliases = {} else: @@ -4180,6 +4180,12 @@ def test_arith_flex_frame(self): exp = f(self.frame, 2 * self.frame) assert_frame_equal(result, exp) + # rops + r_f = lambda x, y: f(y, x) + result = getattr(self.frame, 'r' + op)(2 * self.frame) + exp = r_f(self.frame, 2 * self.frame) + assert_frame_equal(result, exp) + # vs mix float result = getattr(self.mixed_float, op)(2 * self.mixed_float) exp = f(self.mixed_float, 2 * self.mixed_float) diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index f615604dc353c..baf56c36094af 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -304,16 +304,13 @@ def check_op(op, name): assert_frame_equal(result.minor_xs(idx), op(self.panel.minor_xs(idx), xs)) - - check_op(operator.add, 'add') - check_op(operator.sub, 'subtract') - check_op(operator.mul, 'multiply') + ops = ['add', 'sub', 'mul', 'truediv', 'floordiv', 'pow', 'mod'] + if not py3compat.PY3: + ops.append('div') + for op in ops: + check_op(getattr(operator, op), op) if py3compat.PY3: - check_op(operator.truediv, 'divide') - else: - check_op(operator.div, 'divide') - check_op(operator.truediv, 'truediv') - check_op(operator.floordiv, 'floordiv') + check_op(operator.floordiv, 'div') def test_combinePanel(self): result = self.panel.add(self.panel) @@ -1611,6 +1608,33 @@ def test_operators(self): result = (self.panel + 1).to_panel() assert_frame_equal(wp['ItemA'] + 1, result['ItemA']) + def test_arith_flex_panel(self): + ops = ['add', 'sub', 'mul', 'div', 'truediv', 'pow', 'floordiv', 'mod'] + if not py3compat.PY3: + aliases = {} + else: + aliases = {'div': 'truediv'} + self.panel = self.panel.to_panel() + n = np.random.randint(-50, 50) + for op in ops: + try: + alias = aliases.get(op, op) + f = getattr(operator, alias) + result = getattr(self.panel, op)(n) + exp = f(self.panel, n) + print result, exp + assert_panel_equal(result, exp, check_panel_type=True) + + # rops + r_f = lambda x, y: f(y, x) + result = getattr(self.panel, 'r' + op)(n) + exp = r_f(self.panel, n) + print result, exp + assert_panel_equal(result, exp) + except: + print("Failing operation %r" % op) + raise + def test_sort(self): def is_sorted(arr): return (arr[1:] > arr[:-1]).any() diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index 5bf37c919f5cc..5320c7da5daa0 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -259,7 +259,6 @@ class SafeForSparse(object): _ts = tm.makeTimeSeries() - class TestSeries(unittest.TestCase, CheckNameIntegration): _multiprocess_can_split_ = True @@ -1703,7 +1702,9 @@ def _check_op(series, other, op, alt): tm.assert_almost_equal(result, expected) def check(series, other): - simple_ops = ['add', 'sub', 'mul'] + simple_ops = ['add', 'sub', 'mul', 'floordiv', 'truediv', 'pow'] + if not py3compat.PY3: + simple_ops.append('div') for opname in simple_ops: _check_op(series, other, getattr(Series, opname), @@ -1712,6 +1713,7 @@ def check(series, other): check(self.ts, self.ts * 2) check(self.ts, self.ts[::2]) check(self.ts, 5) + check(tm.makeFloatSeries(), tm.makeFloatSeries()) def test_neg(self): assert_series_equal(-self.series, -1 * self.series) @@ -2089,7 +2091,7 @@ def test_comparison_operators_with_nas(self): s = Series(bdate_range('1/1/2000', periods=10), dtype=object) s[::2] = np.nan - # test that comparions work + # test that comparisons work ops = ['lt', 'le', 'gt', 'ge', 'eq', 'ne'] for op in ops: val = s[5]