Merge pull request #5439 from jtratner/make-div-do-truediv

jtratner · jtratner · commit 05cb9609ddfc · 2013-11-05T14:53:09.000-08:00
ENH: Always do true division on Python 2.X
diff --git a/doc/source/release.rst b/doc/source/release.rst
@@ -376,6 +376,25 @@ API Changes
     dates are given (:issue:`5242`)
   - ``Timestamp`` now supports ``now/today/utcnow`` class methods
     (:issue:`5339`)
+  - **All** division with ``NDFrame`` - likes is now truedivision, regardless
+    of the future import. You can use ``//`` and ``floordiv`` to do integer
+    division.
+
+  .. code-block:: python
+    In [3]: arr = np.array([1, 2, 3, 4])
+
+    In [4]: arr2 = np.array([5, 3, 2, 1])
+
+    In [5]: arr / arr2
+    Out[5]: array([0, 0, 1, 4])
+
+    In [6]: pd.Series(arr) / pd.Series(arr2) # no future import required
+    Out[6]:
+    0    0.200000
+    1    0.666667
+    2    1.500000
+    3    4.000000
+    dtype: float64
 
 Internal Refactoring
 ~~~~~~~~~~~~~~~~~~~~
diff --git a/doc/source/v0.13.0.txt b/doc/source/v0.13.0.txt
@@ -55,6 +55,26 @@ API changes
       # and all methods take an inplace kwarg - but returns None
       index.set_names(["bob", "cranberry"], inplace=True)
 
+- **All** division with ``NDFrame`` - likes is now truedivision, regardless
+  of the future import. You can use ``//`` and ``floordiv`` to do integer
+  division.
+
+  .. code-block:: python
+      In [3]: arr = np.array([1, 2, 3, 4])
+
+      In [4]: arr2 = np.array([5, 3, 2, 1])
+
+      In [5]: arr / arr2
+      Out[5]: array([0, 0, 1, 4])
+
+      In [6]: pd.Series(arr) / pd.Series(arr2) # no future import required
+      Out[6]:
+      0    0.200000
+      1    0.666667
+      2    1.500000
+      3    4.000000
+      dtype: float64
+
 - Infer and downcast dtype if ``downcast='infer'`` is passed to ``fillna/ffill/bfill`` (:issue:`4604`)
 - ``__nonzero__`` for all NDFrame objects, will now raise a ``ValueError``, this reverts back to (:issue:`1073`, :issue:`4633`)
   behavior. See :ref:`gotchas<gotchas.truth>` for a more detailed discussion.
diff --git a/pandas/computation/expressions.py b/pandas/computation/expressions.py
@@ -61,6 +61,7 @@ def _evaluate_standard(op, op_str, a, b, raise_on_error=True, **eval_kwargs):
         _store_test_result(False)
     return op(a, b)
 
+
 def _can_use_numexpr(op, op_str, a, b, dtype_check):
     """ return a boolean if we WILL be using numexpr """
     if op_str is not None:
@@ -86,7 +87,8 @@ def _can_use_numexpr(op, op_str, a, b, dtype_check):
     return False
 
 
-def _evaluate_numexpr(op, op_str, a, b, raise_on_error=False, **eval_kwargs):
+def _evaluate_numexpr(op, op_str, a, b, raise_on_error=False, truediv=True,
+                      **eval_kwargs):
     result = None
 
     if _can_use_numexpr(op, op_str, a, b, 'evaluate'):
@@ -96,7 +98,8 @@ def _evaluate_numexpr(op, op_str, a, b, raise_on_error=False, **eval_kwargs):
             result = ne.evaluate('a_value %s b_value' % op_str,
                                  local_dict={'a_value': a_value,
                                              'b_value': b_value},
-                                 casting='safe', **eval_kwargs)
+                                 casting='safe', truediv=truediv,
+                                 **eval_kwargs)
         except (ValueError) as detail:
             if 'unknown type object' in str(detail):
                 pass
@@ -112,10 +115,12 @@ def _evaluate_numexpr(op, op_str, a, b, raise_on_error=False, **eval_kwargs):
 
     return result
 
+
 def _where_standard(cond, a, b, raise_on_error=True):
     return np.where(_values_from_object(cond), _values_from_object(a),
                     _values_from_object(b))
 
+
 def _where_numexpr(cond, a, b, raise_on_error=False):
     result = None
 
@@ -190,10 +195,10 @@ def where(cond, a, b, raise_on_error=False, use_numexpr=True):
     return _where_standard(cond, a, b, raise_on_error=raise_on_error)
 
 
-def set_test_mode(v = True):
+def set_test_mode(v=True):
     """
-    Keeps track of whether numexpr  was used.  Stores an additional ``True`` for
-    every successful use of evaluate with numexpr since the last
+    Keeps track of whether numexpr  was used.  Stores an additional ``True``
+    for every successful use of evaluate with numexpr since the last
     ``get_test_result``
     """
     global _TEST_MODE, _TEST_RESULT
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
@@ -2,7 +2,7 @@
 Generic data algorithms. This module is experimental at the moment and not
 intended for public consumption
 """
-
+from __future__ import division
 from warnings import warn
 import numpy as np
 
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -8,7 +8,7 @@
 alignment and a host of useful data manipulation methods having to do with the
 labeling information
 """
-
+from __future__ import division
 # pylint: disable=E1101,E1103
 # pylint: disable=W0212,W0231,W0703,W0622
 
diff --git a/pandas/core/ops.py b/pandas/core/ops.py
@@ -3,6 +3,8 @@
 
 This is not a public API.
 """
+# necessary to enforce truediv in Python 2.X
+from __future__ import division
 import operator
 import numpy as np
 import pandas as pd
@@ -80,24 +82,10 @@ def names(x):
         rmod=arith_method(lambda x, y: y % x, names('rmod'),
                           default_axis=default_axis),
     )
-    if not compat.PY3:
-        new_methods["div"] = arith_method(operator.div, names('div'), op('/'),
-                                          truediv=False, fill_zeros=np.inf,
-                                          default_axis=default_axis)
-        new_methods["rdiv"] = arith_method(lambda x, y: operator.div(y, x),
-                                           names('rdiv'), truediv=False,
-                                           fill_zeros=np.inf,
-                                           default_axis=default_axis)
-    else:
-        new_methods["div"] = arith_method(operator.truediv, names('div'),
-                                          op('/'), truediv=True,
-                                          fill_zeros=np.inf,
-                                          default_axis=default_axis)
-        new_methods["rdiv"] = arith_method(lambda x, y: operator.truediv(y, x),
-                                           names('rdiv'), truediv=False,
-                                           fill_zeros=np.inf,
-                                           default_axis=default_axis)
-        # Comp methods never had a default axis set
+    new_methods['div'] = new_methods['truediv']
+    new_methods['rdiv'] = new_methods['rtruediv']
+
+    # Comp methods never had a default axis set
     if comp_method:
         new_methods.update(dict(
             eq=comp_method(operator.eq, names('eq'), op('==')),
diff --git a/pandas/core/panel.py b/pandas/core/panel.py
@@ -2,7 +2,7 @@
 Contains data structures designed for manipulating panel (3-dimensional) data
 """
 # pylint: disable=E1103,W0231,W0212,W0621
-
+from __future__ import division
 from pandas.compat import map, zip, range, lrange, lmap, u, OrderedDict, OrderedDefaultdict
 from pandas import compat
 import sys
diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -1,6 +1,7 @@
 """
 Data structure for 1-dimensional cross-sectional and time series data
 """
+from __future__ import division
 
 # pylint: disable=E1101,E1103
 # pylint: disable=W0703,W0622,W0613,W0201
diff --git a/pandas/sparse/array.py b/pandas/sparse/array.py
@@ -1,7 +1,7 @@
 """
 SparseArray data structure
 """
-
+from __future__ import division
 # pylint: disable=E1101,E1103,W0231
 
 from numpy import nan, ndarray
diff --git a/pandas/sparse/frame.py b/pandas/sparse/frame.py
@@ -2,7 +2,7 @@
 Data structures for sparse float data. Life is made simpler by dealing only
 with float64 data
 """
-
+from __future__ import division
 # pylint: disable=E1101,E1103,W0231,E0202
 
 from numpy import nan
diff --git a/pandas/tests/test_expressions.py b/pandas/tests/test_expressions.py
@@ -72,23 +72,21 @@ def run_arithmetic_test(self, df, other, assert_func, check_dtype=False,
         if not compat.PY3:
             operations.append('div')
         for arith in operations:
-            if test_flex:
-                op = getattr(df, arith)
-            else:
-                op = getattr(operator, arith)
+            operator_name = arith
+            if arith == 'div':
+                operator_name = 'truediv'
+
             if test_flex:
                 op = lambda x, y: getattr(df, arith)(y)
                 op.__name__ = arith
             else:
-                op = getattr(operator, arith)
+                op = getattr(operator, operator_name)
             expr.set_use_numexpr(False)
             expected = op(df, other)
             expr.set_use_numexpr(True)
             result = op(df, other)
             try:
                 if check_dtype:
-                    if arith == 'div':
-                        assert expected.dtype.kind == df.dtype.kind
                     if arith == 'truediv':
                         assert expected.dtype.kind == 'f'
                 assert_func(expected, result)
@@ -103,7 +101,7 @@ def test_integer_arithmetic(self):
                                  assert_series_equal, check_dtype=True)
 
     @nose.tools.nottest
-    def run_binary_test(self, df, other, assert_func, check_dtype=False,
+    def run_binary_test(self, df, other, assert_func,
                         test_flex=False, numexpr_ops=set(['gt', 'lt', 'ge',
                                                           'le', 'eq', 'ne'])):
         """
@@ -127,11 +125,6 @@ def run_binary_test(self, df, other, assert_func, check_dtype=False,
             result = op(df, other)
             used_numexpr = expr.get_test_result()
             try:
-                if check_dtype:
-                    if arith == 'div':
-                        assert expected.dtype.kind == result.dtype.kind
-                    if arith == 'truediv':
-                        assert result.dtype.kind == 'f'
                 if arith in numexpr_ops:
                     assert used_numexpr, "Did not use numexpr as expected."
                 else:
@@ -267,8 +260,10 @@ def testit():
             for f, f2 in [ (self.frame, self.frame2), (self.mixed, self.mixed2) ]:
 
                 for op, op_str in [('add','+'),('sub','-'),('mul','*'),('div','/'),('pow','**')]:
-
-                    op = getattr(operator,op,None)
+                    if op == 'div':
+                        op = getattr(operator, 'truediv', None)
+                    else:
+                        op = getattr(operator, op, None)
                     if op is not None:
                         result   = expr._can_use_numexpr(op, op_str, f, f, 'evaluate')
                         self.assert_(result == (not f._is_mixed_type))
diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py
@@ -2032,7 +2032,12 @@ def check(series, other, check_reverse=False):
 
             for opname in simple_ops:
                 op = getattr(Series, opname)
-                alt = getattr(operator, opname)
+
+                if op == 'div':
+                    alt = operator.truediv
+                else:
+                    alt = getattr(operator, opname)
+
                 result = op(series, other)
                 expected = alt(series, other)
                 tm.assert_almost_equal(result, expected)
@@ -2079,11 +2084,11 @@ def test_modulo(self):
 
     def test_div(self):
 
-        # integer div, but deal with the 0's
+        # no longer do integer div for any ops, but deal with the 0's
         p = DataFrame({'first': [3, 4, 5, 8], 'second': [0, 0, 0, 3]})
         result = p['first'] / p['second']
         expected = Series(
-            p['first'].values / p['second'].values, dtype='float64')
+            p['first'].values.astype(float) / p['second'].values, dtype='float64')
         expected.iloc[0:3] = np.inf
         assert_series_equal(result, expected)
 
@@ -2098,10 +2103,7 @@ def test_div(self):
 
         p = DataFrame({'first': [3, 4, 5, 8], 'second': [1, 1, 1, 1]})
         result = p['first'] / p['second']
-        if compat.PY3:
-            assert_series_equal(result, p['first'].astype('float64'))
-        else:
-            assert_series_equal(result, p['first'])
+        assert_series_equal(result, p['first'].astype('float64'))
         self.assertFalse(np.array_equal(result, p['second'] / p['first']))
 
     def test_operators(self):