Skip to content

TST/PERF: Re-write assert_almost_equal() in cython #4398 #5219

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Oct 21, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -414,6 +414,7 @@ See :ref:`Internal Refactoring<whatsnew_0130.refactoring>`
compatible. (:issue:`5213`, :issue:`5214`)
- Unity ``dropna`` for Series/DataFrame signature (:issue:`5250`),
tests from :issue:`5234`, courtesy of @rockg
- Rewrite assert_almost_equal() in cython for performance (:issue:`4398`)

.. _release.bug_fixes-0.13.0:

Expand Down
142 changes: 142 additions & 0 deletions pandas/src/testing.pyx
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
import numpy as np

from pandas import compat
from pandas.core.common import isnull

cdef NUMERIC_TYPES = (
bool,
int,
float,
np.bool,
np.int8,
np.int16,
np.int32,
np.int64,
np.uint8,
np.uint16,
np.uint32,
np.uint64,
np.float16,
np.float32,
np.float64,
)

cdef bint is_comparable_as_number(obj):
return isinstance(obj, NUMERIC_TYPES)

cdef bint isiterable(obj):
return hasattr(obj, '__iter__')

cdef bint has_length(obj):
return hasattr(obj, '__len__')

cdef bint is_dictlike(obj):
return hasattr(obj, 'keys') and hasattr(obj, '__getitem__')

cdef bint decimal_almost_equal(double desired, double actual, int decimal):
# Code from
# http://docs.scipy.org/doc/numpy/reference/generated
# /numpy.testing.assert_almost_equal.html
return abs(desired - actual) < (0.5 * 10.0 ** -decimal)

cpdef assert_dict_equal(a, b, bint compare_keys=True):
assert is_dictlike(a) and is_dictlike(b), (
"Cannot compare dict objects, one or both is not dict-like"
)

a_keys = frozenset(a.keys())
b_keys = frozenset(b.keys())

if compare_keys:
assert a_keys == b_keys

for k in a_keys:
assert_almost_equal(a[k], b[k])

return True

cpdef assert_almost_equal(a, b, bint check_less_precise=False):
cdef:
int decimal
Py_ssize_t i, na, nb
double fa, fb

if isinstance(a, dict) or isinstance(b, dict):
return assert_dict_equal(a, b)

if (isinstance(a, compat.string_types) or
isinstance(b, compat.string_types)):
assert a == b, "%r != %r" % (a, b)
return True

if isiterable(a):
assert isiterable(b), (
"First object is iterable, second isn't: %r != %r" % (a, b)
)
assert has_length(a) and has_length(b), (
"Can't compare objects without length, one or both is invalid: "
"(%r, %r)" % (a, b)
)

na, nb = len(a), len(b)
assert na == nb, (
"Length of two iterators not the same: %r != %r" % (na, nb)
)
if (isinstance(a, np.ndarray) and
isinstance(b, np.ndarray) and
np.array_equal(a, b)):
return True
else:
for i in xrange(na):
assert_almost_equal(a[i], b[i], check_less_precise)
return True
elif isiterable(b):
assert False, (
"Second object is iterable, first isn't: %r != %r" % (a, b)
)

if isnull(a):
assert isnull(b), (
"First object is null, second isn't: %r != %r" % (a, b)
)
return True
elif isnull(b):
assert isnull(a), (
"First object is not null, second is null: %r != %r" % (a, b)
)
return True

if is_comparable_as_number(a):
assert is_comparable_as_number(b), (
"First object is numeric, second is not: %r != %r" % (a, b)
)

decimal = 5

# deal with differing dtypes
if check_less_precise:
dtype_a = np.dtype(type(a))
dtype_b = np.dtype(type(b))
if dtype_a.kind == 'f' and dtype_b == 'f':
if dtype_a.itemsize <= 4 and dtype_b.itemsize <= 4:
decimal = 3

if np.isinf(a):
assert np.isinf(b), "First object is inf, second isn't"
else:
fa, fb = a, b

# case for zero
if abs(fa) < 1e-5:
if not decimal_almost_equal(fa, fb, decimal):
assert False, (
'(very low values) expected %.5f but got %.5f' % (b, a)
)
else:
if not decimal_almost_equal(1, fb / fa, decimal):
assert False, 'expected %.5f but got %.5f' % (b, a)

else:
assert a == b, "%r != %r" % (a, b)

return True
123 changes: 123 additions & 0 deletions pandas/tests/test_testing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
import pandas as pd
import unittest
import warnings
import nose
import numpy as np
import sys

from pandas.util.testing import (
assert_almost_equal, assertRaisesRegexp, raise_with_traceback
)

# let's get meta.

class TestAssertAlmostEqual(unittest.TestCase):
_multiprocess_can_split_ = True

def _assert_almost_equal_both(self, a, b, **kwargs):
assert_almost_equal(a, b, **kwargs)
assert_almost_equal(b, a, **kwargs)

def _assert_not_almost_equal_both(self, a, b, **kwargs):
self.assertRaises(AssertionError, assert_almost_equal, a, b, **kwargs)
self.assertRaises(AssertionError, assert_almost_equal, b, a, **kwargs)

def test_assert_almost_equal_numbers(self):
self._assert_almost_equal_both(1.1, 1.1)
self._assert_almost_equal_both(1.1, 1.100001)
self._assert_almost_equal_both(np.int16(1), 1.000001)
self._assert_almost_equal_both(np.float64(1.1), 1.1)
self._assert_almost_equal_both(np.uint32(5), 5)

self._assert_not_almost_equal_both(1.1, 1)
self._assert_not_almost_equal_both(1.1, True)
self._assert_not_almost_equal_both(1, 2)
self._assert_not_almost_equal_both(1.0001, np.int16(1))

def test_assert_almost_equal_numbers_with_zeros(self):
self._assert_almost_equal_both(0, 0)
self._assert_almost_equal_both(0.000001, 0)

self._assert_not_almost_equal_both(0.001, 0)
self._assert_not_almost_equal_both(1, 0)

def test_assert_almost_equal_numbers_with_mixed(self):
self._assert_not_almost_equal_both(1, 'abc')
self._assert_not_almost_equal_both(1, [1,])
self._assert_not_almost_equal_both(1, object())

def test_assert_almost_equal_dicts(self):
self._assert_almost_equal_both({'a': 1, 'b': 2}, {'a': 1, 'b': 2})

self._assert_not_almost_equal_both({'a': 1, 'b': 2}, {'a': 1, 'b': 3})
self._assert_not_almost_equal_both(
{'a': 1, 'b': 2}, {'a': 1, 'b': 2, 'c': 3}
)
self._assert_not_almost_equal_both({'a': 1}, 1)
self._assert_not_almost_equal_both({'a': 1}, 'abc')
self._assert_not_almost_equal_both({'a': 1}, [1,])

def test_assert_almost_equal_dict_like_object(self):
class DictLikeObj(object):
def keys(self):
return ('a',)

def __getitem__(self, item):
if item == 'a':
return 1

self._assert_almost_equal_both({'a': 1}, DictLikeObj())

self._assert_not_almost_equal_both({'a': 2}, DictLikeObj())

def test_assert_almost_equal_strings(self):
self._assert_almost_equal_both('abc', 'abc')

self._assert_not_almost_equal_both('abc', 'abcd')
self._assert_not_almost_equal_both('abc', 'abd')
self._assert_not_almost_equal_both('abc', 1)
self._assert_not_almost_equal_both('abc', [1,])

def test_assert_almost_equal_iterables(self):
self._assert_almost_equal_both([1, 2, 3], [1, 2, 3])
self._assert_almost_equal_both(np.array([1, 2, 3]), [1, 2, 3])

# Can't compare generators
self._assert_not_almost_equal_both(iter([1, 2, 3]), [1, 2, 3])

self._assert_not_almost_equal_both([1, 2, 3], [1, 2, 4])
self._assert_not_almost_equal_both([1, 2, 3], [1, 2, 3, 4])
self._assert_not_almost_equal_both([1, 2, 3], 1)

def test_assert_almost_equal_null(self):
self._assert_almost_equal_both(None, None)
self._assert_almost_equal_both(None, np.NaN)

self._assert_not_almost_equal_both(None, 0)
self._assert_not_almost_equal_both(np.NaN, 0)

def test_assert_almost_equal_inf(self):
self._assert_almost_equal_both(np.inf, np.inf)
self._assert_almost_equal_both(np.inf, float("inf"))

self._assert_not_almost_equal_both(np.inf, 0)

class TestUtilTesting(unittest.TestCase):
_multiprocess_can_split_ = True

def test_raise_with_traceback(self):
with assertRaisesRegexp(LookupError, "error_text"):
try:
raise ValueError("THIS IS AN ERROR")
except ValueError as e:
e = LookupError("error_text")
raise_with_traceback(e)
with assertRaisesRegexp(LookupError, "error_text"):
try:
raise ValueError("This is another error")
except ValueError:
e = LookupError("error_text")
_, _, traceback = sys.exc_info()
raise_with_traceback(e, traceback)
35 changes: 0 additions & 35 deletions pandas/tests/test_tests.py

This file was deleted.

Loading