Skip to content

Commit da14c6e

Browse files
committed
Merge pull request #3764 from jtratner/fix_division_with_numexpr
BUG: Fix __truediv__ numexpr error
2 parents fad50af + 0e7781c commit da14c6e

File tree

4 files changed

+69
-11
lines changed

4 files changed

+69
-11
lines changed

RELEASE.rst

+3
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,8 @@ pandas 0.11.1
250250
not converting dtypes (GH3911_)
251251
- Fixed a bug where ``DataFrame.replace`` with a compiled regular expression
252252
in the ``to_replace`` argument wasn't working (GH3907_)
253+
- Fixed ``__truediv__`` in Python 2.7 with ``numexpr`` installed to actually do true division when dividing
254+
two integer arrays with at least 10000 cells total (GH3764_)
253255

254256
.. _GH3164: https://github.com/pydata/pandas/issues/3164
255257
.. _GH2786: https://github.com/pydata/pandas/issues/2786
@@ -351,6 +353,7 @@ pandas 0.11.1
351353
.. _GH3907: https://github.com/pydata/pandas/issues/3907
352354
.. _GH3911: https://github.com/pydata/pandas/issues/3911
353355
.. _GH3912: https://github.com/pydata/pandas/issues/3912
356+
.. _GH3764: https://github.com/pydata/pandas/issues/3764
354357

355358
pandas 0.11.0
356359
=============

pandas/core/expressions.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ def set_numexpr_threads(n = None):
5151
pass
5252

5353

54-
def _evaluate_standard(op, op_str, a, b, raise_on_error=True):
54+
def _evaluate_standard(op, op_str, a, b, raise_on_error=True, **eval_kwargs):
5555
""" standard evaluation """
5656
return op(a,b)
5757

@@ -79,7 +79,7 @@ def _can_use_numexpr(op, op_str, a, b, dtype_check):
7979

8080
return False
8181

82-
def _evaluate_numexpr(op, op_str, a, b, raise_on_error = False):
82+
def _evaluate_numexpr(op, op_str, a, b, raise_on_error = False, **eval_kwargs):
8383
result = None
8484

8585
if _can_use_numexpr(op, op_str, a, b, 'evaluate'):
@@ -92,7 +92,7 @@ def _evaluate_numexpr(op, op_str, a, b, raise_on_error = False):
9292
result = ne.evaluate('a_value %s b_value' % op_str,
9393
local_dict={ 'a_value' : a_value,
9494
'b_value' : b_value },
95-
casting='safe')
95+
casting='safe', **eval_kwargs)
9696
except (ValueError), detail:
9797
if 'unknown type object' in str(detail):
9898
pass
@@ -142,7 +142,7 @@ def _where_numexpr(cond, a, b, raise_on_error = False):
142142
# turn myself on
143143
set_use_numexpr(True)
144144

145-
def evaluate(op, op_str, a, b, raise_on_error=False, use_numexpr=True):
145+
def evaluate(op, op_str, a, b, raise_on_error=False, use_numexpr=True, **eval_kwargs):
146146
""" evaluate and return the expression of the op on a and b
147147
148148
Parameters
@@ -158,7 +158,7 @@ def evaluate(op, op_str, a, b, raise_on_error=False, use_numexpr=True):
158158
"""
159159

160160
if use_numexpr:
161-
return _evaluate(op, op_str, a, b, raise_on_error=raise_on_error)
161+
return _evaluate(op, op_str, a, b, raise_on_error=raise_on_error, **eval_kwargs)
162162
return _evaluate_standard(op, op_str, a, b, raise_on_error=raise_on_error)
163163

164164
def where(cond, a, b, raise_on_error=False, use_numexpr=True):

pandas/core/frame.py

+10-5
Original file line numberDiff line numberDiff line change
@@ -190,10 +190,10 @@ class DataConflictError(Exception):
190190
# Factory helper methods
191191

192192

193-
def _arith_method(op, name, str_rep = None, default_axis='columns', fill_zeros=None):
193+
def _arith_method(op, name, str_rep = None, default_axis='columns', fill_zeros=None, **eval_kwargs):
194194
def na_op(x, y):
195195
try:
196-
result = expressions.evaluate(op, str_rep, x, y, raise_on_error=True)
196+
result = expressions.evaluate(op, str_rep, x, y, raise_on_error=True, **eval_kwargs)
197197
result = com._fill_zeros(result,y,fill_zeros)
198198

199199
except TypeError:
@@ -853,12 +853,17 @@ def __contains__(self, key):
853853
__sub__ = _arith_method(operator.sub, '__sub__', '-', default_axis=None)
854854
__mul__ = _arith_method(operator.mul, '__mul__', '*', default_axis=None)
855855
__truediv__ = _arith_method(operator.truediv, '__truediv__', '/',
856-
default_axis=None, fill_zeros=np.inf)
856+
default_axis=None, fill_zeros=np.inf, truediv=True)
857+
# numexpr produces a different value (python/numpy: 0.000, numexpr: inf)
858+
# when dividing by zero, so can't use floordiv speed up (yet)
859+
# __floordiv__ = _arith_method(operator.floordiv, '__floordiv__', '//',
857860
__floordiv__ = _arith_method(operator.floordiv, '__floordiv__',
858861
default_axis=None, fill_zeros=np.inf)
859862
__pow__ = _arith_method(operator.pow, '__pow__', '**', default_axis=None)
860863

861-
__mod__ = _arith_method(operator.mod, '__mod__', '*', default_axis=None, fill_zeros=np.nan)
864+
# currently causes a floating point exception to occur - so sticking with unaccelerated for now
865+
# __mod__ = _arith_method(operator.mod, '__mod__', '%', default_axis=None, fill_zeros=np.nan)
866+
__mod__ = _arith_method(operator.mod, '__mod__', default_axis=None, fill_zeros=np.nan)
862867

863868
__radd__ = _arith_method(_radd_compat, '__radd__', default_axis=None)
864869
__rmul__ = _arith_method(operator.mul, '__rmul__', default_axis=None)
@@ -879,7 +884,7 @@ def __contains__(self, key):
879884
# Python 2 division methods
880885
if not py3compat.PY3:
881886
__div__ = _arith_method(operator.div, '__div__', '/',
882-
default_axis=None, fill_zeros=np.inf)
887+
default_axis=None, fill_zeros=np.inf, truediv=False)
883888
__rdiv__ = _arith_method(lambda x, y: y / x, '__rdiv__',
884889
default_axis=None, fill_zeros=np.inf)
885890

pandas/tests/test_expressions.py

+51-1
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
_frame2 = DataFrame(np.random.randn(100, 4), columns = list('ABCD'), dtype='float64')
3131
_mixed = DataFrame({ 'A' : _frame['A'].copy(), 'B' : _frame['B'].astype('float32'), 'C' : _frame['C'].astype('int64'), 'D' : _frame['D'].astype('int32') })
3232
_mixed2 = DataFrame({ 'A' : _frame2['A'].copy(), 'B' : _frame2['B'].astype('float32'), 'C' : _frame2['C'].astype('int64'), 'D' : _frame2['D'].astype('int32') })
33+
_integer = DataFrame(np.random.randint(1, 100, size=(10001, 4)), columns = list('ABCD'), dtype='int64')
3334

3435
class TestExpressions(unittest.TestCase):
3536

@@ -41,7 +42,56 @@ def setUp(self):
4142
self.frame2 = _frame2.copy()
4243
self.mixed = _mixed.copy()
4344
self.mixed2 = _mixed2.copy()
44-
45+
self.integer = _integer.copy()
46+
self._MIN_ELEMENTS = expr._MIN_ELEMENTS
47+
48+
def tearDown(self):
49+
expr._MIN_ELEMENTS = self._MIN_ELEMENTS
50+
51+
#TODO: add test for Panel
52+
#TODO: add tests for binary operations
53+
@nose.tools.nottest
54+
def run_arithmetic_test(self, df, assert_func, check_dtype=False):
55+
expr._MIN_ELEMENTS = 0
56+
operations = ['add', 'sub', 'mul','mod','truediv','floordiv','pow']
57+
if not py3compat.PY3:
58+
operations.append('div')
59+
for arith in operations:
60+
op = getattr(operator, arith)
61+
expr.set_use_numexpr(False)
62+
expected = op(df, df)
63+
expr.set_use_numexpr(True)
64+
result = op(df, df)
65+
try:
66+
if check_dtype:
67+
if arith == 'div':
68+
assert expected.dtype.kind == df.dtype.kind
69+
if arith == 'truediv':
70+
assert expected.dtype.kind == 'f'
71+
assert_func(expected, result)
72+
except Exception:
73+
print("Failed test with operator %r" % op.__name__)
74+
raise
75+
76+
def test_integer_arithmetic(self):
77+
self.run_arithmetic_test(self.integer, assert_frame_equal)
78+
self.run_arithmetic_test(self.integer.icol(0), assert_series_equal,
79+
check_dtype=True)
80+
81+
def test_float_arithemtic(self):
82+
self.run_arithmetic_test(self.frame, assert_frame_equal)
83+
self.run_arithmetic_test(self.frame.icol(0), assert_series_equal,
84+
check_dtype=True)
85+
86+
def test_mixed_arithmetic(self):
87+
self.run_arithmetic_test(self.mixed, assert_frame_equal)
88+
for col in self.mixed.columns:
89+
self.run_arithmetic_test(self.mixed[col], assert_series_equal)
90+
91+
def test_integer_with_zeros(self):
92+
self.integer *= np.random.randint(0, 2, size=np.shape(self.integer))
93+
self.run_arithmetic_test(self.integer, assert_frame_equal)
94+
self.run_arithmetic_test(self.integer.icol(0), assert_series_equal)
4595

4696
def test_invalid(self):
4797

0 commit comments

Comments
 (0)