Skip to content

Commit 1023b79

Browse files
committed
Merge pull request #5219 from danbirken/c_assert_almost_equal_simple
TST/PERF: Re-write assert_almost_equal() in cython #4398
2 parents 7e3585d + 850220f commit 1023b79

File tree

6 files changed

+282
-102
lines changed

6 files changed

+282
-102
lines changed

doc/source/release.rst

+1
Original file line numberDiff line numberDiff line change
@@ -414,6 +414,7 @@ See :ref:`Internal Refactoring<whatsnew_0130.refactoring>`
414414
compatible. (:issue:`5213`, :issue:`5214`)
415415
- Unity ``dropna`` for Series/DataFrame signature (:issue:`5250`),
416416
tests from :issue:`5234`, courtesy of @rockg
417+
- Rewrite assert_almost_equal() in cython for performance (:issue:`4398`)
417418

418419
.. _release.bug_fixes-0.13.0:
419420

pandas/src/testing.pyx

+142
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
import numpy as np
2+
3+
from pandas import compat
4+
from pandas.core.common import isnull
5+
6+
cdef NUMERIC_TYPES = (
7+
bool,
8+
int,
9+
float,
10+
np.bool,
11+
np.int8,
12+
np.int16,
13+
np.int32,
14+
np.int64,
15+
np.uint8,
16+
np.uint16,
17+
np.uint32,
18+
np.uint64,
19+
np.float16,
20+
np.float32,
21+
np.float64,
22+
)
23+
24+
cdef bint is_comparable_as_number(obj):
25+
return isinstance(obj, NUMERIC_TYPES)
26+
27+
cdef bint isiterable(obj):
28+
return hasattr(obj, '__iter__')
29+
30+
cdef bint has_length(obj):
31+
return hasattr(obj, '__len__')
32+
33+
cdef bint is_dictlike(obj):
34+
return hasattr(obj, 'keys') and hasattr(obj, '__getitem__')
35+
36+
cdef bint decimal_almost_equal(double desired, double actual, int decimal):
37+
# Code from
38+
# http://docs.scipy.org/doc/numpy/reference/generated
39+
# /numpy.testing.assert_almost_equal.html
40+
return abs(desired - actual) < (0.5 * 10.0 ** -decimal)
41+
42+
cpdef assert_dict_equal(a, b, bint compare_keys=True):
43+
assert is_dictlike(a) and is_dictlike(b), (
44+
"Cannot compare dict objects, one or both is not dict-like"
45+
)
46+
47+
a_keys = frozenset(a.keys())
48+
b_keys = frozenset(b.keys())
49+
50+
if compare_keys:
51+
assert a_keys == b_keys
52+
53+
for k in a_keys:
54+
assert_almost_equal(a[k], b[k])
55+
56+
return True
57+
58+
cpdef assert_almost_equal(a, b, bint check_less_precise=False):
59+
cdef:
60+
int decimal
61+
Py_ssize_t i, na, nb
62+
double fa, fb
63+
64+
if isinstance(a, dict) or isinstance(b, dict):
65+
return assert_dict_equal(a, b)
66+
67+
if (isinstance(a, compat.string_types) or
68+
isinstance(b, compat.string_types)):
69+
assert a == b, "%r != %r" % (a, b)
70+
return True
71+
72+
if isiterable(a):
73+
assert isiterable(b), (
74+
"First object is iterable, second isn't: %r != %r" % (a, b)
75+
)
76+
assert has_length(a) and has_length(b), (
77+
"Can't compare objects without length, one or both is invalid: "
78+
"(%r, %r)" % (a, b)
79+
)
80+
81+
na, nb = len(a), len(b)
82+
assert na == nb, (
83+
"Length of two iterators not the same: %r != %r" % (na, nb)
84+
)
85+
if (isinstance(a, np.ndarray) and
86+
isinstance(b, np.ndarray) and
87+
np.array_equal(a, b)):
88+
return True
89+
else:
90+
for i in xrange(na):
91+
assert_almost_equal(a[i], b[i], check_less_precise)
92+
return True
93+
elif isiterable(b):
94+
assert False, (
95+
"Second object is iterable, first isn't: %r != %r" % (a, b)
96+
)
97+
98+
if isnull(a):
99+
assert isnull(b), (
100+
"First object is null, second isn't: %r != %r" % (a, b)
101+
)
102+
return True
103+
elif isnull(b):
104+
assert isnull(a), (
105+
"First object is not null, second is null: %r != %r" % (a, b)
106+
)
107+
return True
108+
109+
if is_comparable_as_number(a):
110+
assert is_comparable_as_number(b), (
111+
"First object is numeric, second is not: %r != %r" % (a, b)
112+
)
113+
114+
decimal = 5
115+
116+
# deal with differing dtypes
117+
if check_less_precise:
118+
dtype_a = np.dtype(type(a))
119+
dtype_b = np.dtype(type(b))
120+
if dtype_a.kind == 'f' and dtype_b == 'f':
121+
if dtype_a.itemsize <= 4 and dtype_b.itemsize <= 4:
122+
decimal = 3
123+
124+
if np.isinf(a):
125+
assert np.isinf(b), "First object is inf, second isn't"
126+
else:
127+
fa, fb = a, b
128+
129+
# case for zero
130+
if abs(fa) < 1e-5:
131+
if not decimal_almost_equal(fa, fb, decimal):
132+
assert False, (
133+
'(very low values) expected %.5f but got %.5f' % (b, a)
134+
)
135+
else:
136+
if not decimal_almost_equal(1, fb / fa, decimal):
137+
assert False, 'expected %.5f but got %.5f' % (b, a)
138+
139+
else:
140+
assert a == b, "%r != %r" % (a, b)
141+
142+
return True

pandas/tests/test_testing.py

+123
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
#!/usr/bin/python
2+
# -*- coding: utf-8 -*-
3+
import pandas as pd
4+
import unittest
5+
import warnings
6+
import nose
7+
import numpy as np
8+
import sys
9+
10+
from pandas.util.testing import (
11+
assert_almost_equal, assertRaisesRegexp, raise_with_traceback
12+
)
13+
14+
# let's get meta.
15+
16+
class TestAssertAlmostEqual(unittest.TestCase):
17+
_multiprocess_can_split_ = True
18+
19+
def _assert_almost_equal_both(self, a, b, **kwargs):
20+
assert_almost_equal(a, b, **kwargs)
21+
assert_almost_equal(b, a, **kwargs)
22+
23+
def _assert_not_almost_equal_both(self, a, b, **kwargs):
24+
self.assertRaises(AssertionError, assert_almost_equal, a, b, **kwargs)
25+
self.assertRaises(AssertionError, assert_almost_equal, b, a, **kwargs)
26+
27+
def test_assert_almost_equal_numbers(self):
28+
self._assert_almost_equal_both(1.1, 1.1)
29+
self._assert_almost_equal_both(1.1, 1.100001)
30+
self._assert_almost_equal_both(np.int16(1), 1.000001)
31+
self._assert_almost_equal_both(np.float64(1.1), 1.1)
32+
self._assert_almost_equal_both(np.uint32(5), 5)
33+
34+
self._assert_not_almost_equal_both(1.1, 1)
35+
self._assert_not_almost_equal_both(1.1, True)
36+
self._assert_not_almost_equal_both(1, 2)
37+
self._assert_not_almost_equal_both(1.0001, np.int16(1))
38+
39+
def test_assert_almost_equal_numbers_with_zeros(self):
40+
self._assert_almost_equal_both(0, 0)
41+
self._assert_almost_equal_both(0.000001, 0)
42+
43+
self._assert_not_almost_equal_both(0.001, 0)
44+
self._assert_not_almost_equal_both(1, 0)
45+
46+
def test_assert_almost_equal_numbers_with_mixed(self):
47+
self._assert_not_almost_equal_both(1, 'abc')
48+
self._assert_not_almost_equal_both(1, [1,])
49+
self._assert_not_almost_equal_both(1, object())
50+
51+
def test_assert_almost_equal_dicts(self):
52+
self._assert_almost_equal_both({'a': 1, 'b': 2}, {'a': 1, 'b': 2})
53+
54+
self._assert_not_almost_equal_both({'a': 1, 'b': 2}, {'a': 1, 'b': 3})
55+
self._assert_not_almost_equal_both(
56+
{'a': 1, 'b': 2}, {'a': 1, 'b': 2, 'c': 3}
57+
)
58+
self._assert_not_almost_equal_both({'a': 1}, 1)
59+
self._assert_not_almost_equal_both({'a': 1}, 'abc')
60+
self._assert_not_almost_equal_both({'a': 1}, [1,])
61+
62+
def test_assert_almost_equal_dict_like_object(self):
63+
class DictLikeObj(object):
64+
def keys(self):
65+
return ('a',)
66+
67+
def __getitem__(self, item):
68+
if item == 'a':
69+
return 1
70+
71+
self._assert_almost_equal_both({'a': 1}, DictLikeObj())
72+
73+
self._assert_not_almost_equal_both({'a': 2}, DictLikeObj())
74+
75+
def test_assert_almost_equal_strings(self):
76+
self._assert_almost_equal_both('abc', 'abc')
77+
78+
self._assert_not_almost_equal_both('abc', 'abcd')
79+
self._assert_not_almost_equal_both('abc', 'abd')
80+
self._assert_not_almost_equal_both('abc', 1)
81+
self._assert_not_almost_equal_both('abc', [1,])
82+
83+
def test_assert_almost_equal_iterables(self):
84+
self._assert_almost_equal_both([1, 2, 3], [1, 2, 3])
85+
self._assert_almost_equal_both(np.array([1, 2, 3]), [1, 2, 3])
86+
87+
# Can't compare generators
88+
self._assert_not_almost_equal_both(iter([1, 2, 3]), [1, 2, 3])
89+
90+
self._assert_not_almost_equal_both([1, 2, 3], [1, 2, 4])
91+
self._assert_not_almost_equal_both([1, 2, 3], [1, 2, 3, 4])
92+
self._assert_not_almost_equal_both([1, 2, 3], 1)
93+
94+
def test_assert_almost_equal_null(self):
95+
self._assert_almost_equal_both(None, None)
96+
self._assert_almost_equal_both(None, np.NaN)
97+
98+
self._assert_not_almost_equal_both(None, 0)
99+
self._assert_not_almost_equal_both(np.NaN, 0)
100+
101+
def test_assert_almost_equal_inf(self):
102+
self._assert_almost_equal_both(np.inf, np.inf)
103+
self._assert_almost_equal_both(np.inf, float("inf"))
104+
105+
self._assert_not_almost_equal_both(np.inf, 0)
106+
107+
class TestUtilTesting(unittest.TestCase):
108+
_multiprocess_can_split_ = True
109+
110+
def test_raise_with_traceback(self):
111+
with assertRaisesRegexp(LookupError, "error_text"):
112+
try:
113+
raise ValueError("THIS IS AN ERROR")
114+
except ValueError as e:
115+
e = LookupError("error_text")
116+
raise_with_traceback(e)
117+
with assertRaisesRegexp(LookupError, "error_text"):
118+
try:
119+
raise ValueError("This is another error")
120+
except ValueError:
121+
e = LookupError("error_text")
122+
_, _, traceback = sys.exc_info()
123+
raise_with_traceback(e, traceback)

pandas/tests/test_tests.py

-35
This file was deleted.

0 commit comments

Comments
 (0)