pandas-dev · jreback · Oct 21, 2013 · Oct 21, 2013 · Oct 21, 2013 · Oct 21, 2013
diff --git a/doc/source/release.rst b/doc/source/release.rst
@@ -414,6 +414,7 @@ See :ref:`Internal Refactoring<whatsnew_0130.refactoring>`
    compatible. (:issue:`5213`, :issue:`5214`)
  - Unity ``dropna`` for Series/DataFrame signature (:issue:`5250`),
    tests from :issue:`5234`, courtesy of @rockg
+ - Rewrite assert_almost_equal() in cython for performance (:issue:`4398`)
 
 .. _release.bug_fixes-0.13.0:
 

diff --git a/pandas/src/testing.pyx b/pandas/src/testing.pyx
@@ -0,0 +1,142 @@
+import numpy as np
+
+from pandas import compat
+from pandas.core.common import isnull
+
+cdef NUMERIC_TYPES = (
+    bool,
+    int,
+    float,
+    np.bool,
+    np.int8,
+    np.int16,
+    np.int32,
+    np.int64,
+    np.uint8,
+    np.uint16,
+    np.uint32,
+    np.uint64,
+    np.float16,
+    np.float32,
+    np.float64,
+)
+
+cdef bint is_comparable_as_number(obj):
+    return isinstance(obj, NUMERIC_TYPES)
+
+cdef bint isiterable(obj):
+    return hasattr(obj, '__iter__')
+
+cdef bint has_length(obj):
+    return hasattr(obj, '__len__')
+
+cdef bint is_dictlike(obj):
+    return hasattr(obj, 'keys') and hasattr(obj, '__getitem__')
+
+cdef bint decimal_almost_equal(double desired, double actual, int decimal):
+    # Code from
+    # http://docs.scipy.org/doc/numpy/reference/generated
+    # /numpy.testing.assert_almost_equal.html
+    return abs(desired - actual) < (0.5 * 10.0 ** -decimal)
+
+cpdef assert_dict_equal(a, b, bint compare_keys=True):
+    assert is_dictlike(a) and is_dictlike(b), (
+        "Cannot compare dict objects, one or both is not dict-like"
+    )
+
+    a_keys = frozenset(a.keys())
+    b_keys = frozenset(b.keys())
+
+    if compare_keys:
+        assert a_keys == b_keys
+
+    for k in a_keys:
+        assert_almost_equal(a[k], b[k])
+
+    return True
+
+cpdef assert_almost_equal(a, b, bint check_less_precise=False):
+    cdef:
+        int decimal
+        Py_ssize_t i, na, nb
+        double fa, fb
+
+    if isinstance(a, dict) or isinstance(b, dict):
+        return assert_dict_equal(a, b)
+
+    if (isinstance(a, compat.string_types) or
+            isinstance(b, compat.string_types)):
+        assert a == b, "%r != %r" % (a, b)
+        return True
+
+    if isiterable(a):
+        assert isiterable(b), (
+            "First object is iterable, second isn't: %r != %r" % (a, b)
+        )
+        assert has_length(a) and has_length(b), (
+            "Can't compare objects without length, one or both is invalid: "
+            "(%r, %r)" % (a, b)
+        )
+
+        na, nb = len(a), len(b)
+        assert na == nb, (
+            "Length of two iterators not the same: %r != %r" % (na, nb)
+        )
+        if (isinstance(a, np.ndarray) and
+                isinstance(b, np.ndarray) and
+                np.array_equal(a, b)):
+            return True
+        else:
+            for i in xrange(na):
+                assert_almost_equal(a[i], b[i], check_less_precise)
+        return True
+    elif isiterable(b):
+        assert False, (
+            "Second object is iterable, first isn't: %r != %r" % (a, b)
+        )
+
+    if isnull(a):
+        assert isnull(b), (
+            "First object is null, second isn't: %r != %r" % (a, b)
+        )
+        return True
+    elif isnull(b):
+        assert isnull(a), (
+            "First object is not null, second is null: %r != %r" % (a, b)
+        )
+        return True
+
+    if is_comparable_as_number(a):
+        assert is_comparable_as_number(b), (
+            "First object is numeric, second is not: %r != %r" % (a, b)
+        )
+
+        decimal = 5
+
+        # deal with differing dtypes
+        if check_less_precise:
+            dtype_a = np.dtype(type(a))
+            dtype_b = np.dtype(type(b))
+            if dtype_a.kind == 'f' and dtype_b == 'f':
+                if dtype_a.itemsize <= 4 and dtype_b.itemsize <= 4:
+                    decimal = 3
+
+        if np.isinf(a):
+            assert np.isinf(b), "First object is inf, second isn't"
+        else:
+            fa, fb = a, b
+
+            # case for zero
+            if abs(fa) < 1e-5:
+                if not decimal_almost_equal(fa, fb, decimal):
+                    assert False, (
+                        '(very low values) expected %.5f but got %.5f' % (b, a)
+                    )
+            else:
+                if not decimal_almost_equal(1, fb / fa, decimal):
+                    assert False, 'expected %.5f but got %.5f' % (b, a)
+
+    else:
+        assert a == b, "%r != %r" % (a, b)
+
+    return True
diff --git a/pandas/tests/test_testing.py b/pandas/tests/test_testing.py
@@ -0,0 +1,123 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+import pandas as pd
+import unittest
+import warnings
+import nose
+import numpy as np
+import sys
+
+from pandas.util.testing import (
+    assert_almost_equal, assertRaisesRegexp, raise_with_traceback
+)
+
+# let's get meta.
+
+class TestAssertAlmostEqual(unittest.TestCase):
+    _multiprocess_can_split_ = True
+
+    def _assert_almost_equal_both(self, a, b, **kwargs):
+        assert_almost_equal(a, b, **kwargs)
+        assert_almost_equal(b, a, **kwargs)
+
+    def _assert_not_almost_equal_both(self, a, b, **kwargs):
+        self.assertRaises(AssertionError, assert_almost_equal, a, b, **kwargs)
+        self.assertRaises(AssertionError, assert_almost_equal, b, a, **kwargs)
+
+    def test_assert_almost_equal_numbers(self):
+        self._assert_almost_equal_both(1.1, 1.1)
+        self._assert_almost_equal_both(1.1, 1.100001)
+        self._assert_almost_equal_both(np.int16(1), 1.000001)
+        self._assert_almost_equal_both(np.float64(1.1), 1.1)
+        self._assert_almost_equal_both(np.uint32(5), 5)
+
+        self._assert_not_almost_equal_both(1.1, 1)
+        self._assert_not_almost_equal_both(1.1, True)
+        self._assert_not_almost_equal_both(1, 2)
+        self._assert_not_almost_equal_both(1.0001, np.int16(1))
+
+    def test_assert_almost_equal_numbers_with_zeros(self):
+        self._assert_almost_equal_both(0, 0)
+        self._assert_almost_equal_both(0.000001, 0)
+
+        self._assert_not_almost_equal_both(0.001, 0)
+        self._assert_not_almost_equal_both(1, 0)
+
+    def test_assert_almost_equal_numbers_with_mixed(self):
+        self._assert_not_almost_equal_both(1, 'abc')
+        self._assert_not_almost_equal_both(1, [1,])
+        self._assert_not_almost_equal_both(1, object())
+
+    def test_assert_almost_equal_dicts(self):
+        self._assert_almost_equal_both({'a': 1, 'b': 2}, {'a': 1, 'b': 2})
+
+        self._assert_not_almost_equal_both({'a': 1, 'b': 2}, {'a': 1, 'b': 3})
+        self._assert_not_almost_equal_both(
+            {'a': 1, 'b': 2}, {'a': 1, 'b': 2, 'c': 3}
+        )
+        self._assert_not_almost_equal_both({'a': 1}, 1)
+        self._assert_not_almost_equal_both({'a': 1}, 'abc')
+        self._assert_not_almost_equal_both({'a': 1}, [1,])
+
+    def test_assert_almost_equal_dict_like_object(self):
+        class DictLikeObj(object):
+            def keys(self):
+                return ('a',)
+
+            def __getitem__(self, item):
+                if item == 'a':
+                    return 1
+
+        self._assert_almost_equal_both({'a': 1}, DictLikeObj())
+
+        self._assert_not_almost_equal_both({'a': 2}, DictLikeObj())
+
+    def test_assert_almost_equal_strings(self):
+        self._assert_almost_equal_both('abc', 'abc')
+
+        self._assert_not_almost_equal_both('abc', 'abcd')
+        self._assert_not_almost_equal_both('abc', 'abd')
+        self._assert_not_almost_equal_both('abc', 1)
+        self._assert_not_almost_equal_both('abc', [1,])
+
+    def test_assert_almost_equal_iterables(self):
+        self._assert_almost_equal_both([1, 2, 3], [1, 2, 3])
+        self._assert_almost_equal_both(np.array([1, 2, 3]), [1, 2, 3])
+
+        # Can't compare generators
+        self._assert_not_almost_equal_both(iter([1, 2, 3]), [1, 2, 3])
+
+        self._assert_not_almost_equal_both([1, 2, 3], [1, 2, 4])
+        self._assert_not_almost_equal_both([1, 2, 3], [1, 2, 3, 4])
+        self._assert_not_almost_equal_both([1, 2, 3], 1)
+
+    def test_assert_almost_equal_null(self):
+        self._assert_almost_equal_both(None, None)
+        self._assert_almost_equal_both(None, np.NaN)
+
+        self._assert_not_almost_equal_both(None, 0)
+        self._assert_not_almost_equal_both(np.NaN, 0)
+
+    def test_assert_almost_equal_inf(self):
+        self._assert_almost_equal_both(np.inf, np.inf)
+        self._assert_almost_equal_both(np.inf, float("inf"))
+
+        self._assert_not_almost_equal_both(np.inf, 0)
+
+class TestUtilTesting(unittest.TestCase):
+    _multiprocess_can_split_ = True
+
+    def test_raise_with_traceback(self):
+        with assertRaisesRegexp(LookupError, "error_text"):
+            try:
+                raise ValueError("THIS IS AN ERROR")
+            except ValueError as e:
+                e = LookupError("error_text")
+                raise_with_traceback(e)
+        with assertRaisesRegexp(LookupError, "error_text"):
+            try:
+                raise ValueError("This is another error")
+            except ValueError:
+                e = LookupError("error_text")
+                _, _, traceback = sys.exc_info()
+                raise_with_traceback(e, traceback)
diff --git a/pandas/tests/test_tests.py b/pandas/tests/test_tests.py