From 4ed857e1616d407d5223180d0130b6d982b4ef85 Mon Sep 17 00:00:00 2001 From: sinhrks Date: Fri, 26 Jun 2015 04:54:37 +0900 Subject: [PATCH] TST: make assertion messages more understandable --- pandas/io/tests/test_json/test_pandas.py | 28 +- pandas/src/testing.pyx | 83 +++++- pandas/tests/test_index.py | 35 ++- pandas/tests/test_testing.py | 353 +++++++++++++++++++++- pandas/util/testing.py | 357 ++++++++++++++++++----- 5 files changed, 763 insertions(+), 93 deletions(-) diff --git a/pandas/io/tests/test_json/test_pandas.py b/pandas/io/tests/test_json/test_pandas.py index c145c717df4c4..66c2bbde0b3f8 100644 --- a/pandas/io/tests/test_json/test_pandas.py +++ b/pandas/io/tests/test_json/test_pandas.py @@ -178,7 +178,10 @@ def _check_orient(df, orient, dtype=None, numpy=False, self.assertTrue(df.columns.equals(unser.columns)) elif orient == "values": # index and cols are not captured in this orientation - assert_almost_equal(df.values, unser.values) + if numpy is True and df.shape == (0, 0): + assert unser.shape[0] == 0 + else: + assert_almost_equal(df.values, unser.values) elif orient == "split": # index and col labels might not be strings unser.index = [str(i) for i in unser.index] @@ -670,15 +673,20 @@ def test_doc_example(self): def test_misc_example(self): # parsing unordered input fails - result = read_json('[{"a": 1, "b": 2}, {"b":2, "a" :1}]',numpy=True) - expected = DataFrame([[1,2],[1,2]],columns=['a','b']) - with tm.assertRaisesRegexp(AssertionError, - '\[index\] left \[.+\], right \[.+\]'): + result = read_json('[{"a": 1, "b": 2}, {"b":2, "a" :1}]', numpy=True) + expected = DataFrame([[1,2], [1,2]], columns=['a', 'b']) + + error_msg = """DataFrame\\.index are different + +DataFrame\\.index values are different \\(100\\.0 %\\) +\\[left\\]: Index\\(\\[u?'a', u?'b'\\], dtype='object'\\) +\\[right\\]: Int64Index\\(\\[0, 1\\], dtype='int64'\\)""" + with tm.assertRaisesRegexp(AssertionError, error_msg): assert_frame_equal(result, expected) result = read_json('[{"a": 1, "b": 2}, {"b":2, "a" :1}]') - expected = DataFrame([[1,2],[1,2]],columns=['a','b']) - assert_frame_equal(result,expected) + expected = DataFrame([[1,2], [1,2]], columns=['a','b']) + assert_frame_equal(result, expected) @network def test_round_trip_exception_(self): @@ -739,3 +747,9 @@ def my_handler_raises(obj): raise TypeError("raisin") self.assertRaises(TypeError, DataFrame({'a': [1, 2, object()]}).to_json, default_handler=my_handler_raises) + + +if __name__ == '__main__': + import nose + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', + '--pdb-failure', '-s'], exit=False) \ No newline at end of file diff --git a/pandas/src/testing.pyx b/pandas/src/testing.pyx index 4977a80acc936..1abc758559e70 100644 --- a/pandas/src/testing.pyx +++ b/pandas/src/testing.pyx @@ -55,11 +55,39 @@ cpdef assert_dict_equal(a, b, bint compare_keys=True): return True -cpdef assert_almost_equal(a, b, bint check_less_precise=False): +cpdef assert_almost_equal(a, b, bint check_less_precise=False, + obj=None, lobj=None, robj=None): + """Check that left and right objects are almost equal. + + Parameters + ---------- + a : object + b : object + check_less_precise : bool, default False + Specify comparison precision. + 5 digits (False) or 3 digits (True) after decimal points are compared. + obj : str, default None + Specify object name being compared, internally used to show appropriate + assertion message + lobj : str, default None + Specify left object name being compared, internally used to show + appropriate assertion message + robj : str, default None + Specify right object name being compared, internally used to show + appropriate assertion message + """ + cdef: int decimal + double diff = 0.0 Py_ssize_t i, na, nb double fa, fb + bint is_unequal = False + + if lobj is None: + lobj = a + if robj is None: + robj = b if isinstance(a, dict) or isinstance(b, dict): return assert_dict_equal(a, b) @@ -70,33 +98,62 @@ cpdef assert_almost_equal(a, b, bint check_less_precise=False): return True if isiterable(a): - assert isiterable(b), ( - "First object is iterable, second isn't: %r != %r" % (a, b) - ) + + if not isiterable(b): + from pandas.util.testing import raise_assert_detail + if obj is None: + obj = 'Iterable' + msg = "First object is iterable, second isn't" + raise_assert_detail(obj, msg, a, b) + assert has_length(a) and has_length(b), ( "Can't compare objects without length, one or both is invalid: " "(%r, %r)" % (a, b) ) - na, nb = len(a), len(b) - assert na == nb, ( - "Length of two iterators not the same: %r != %r" % (na, nb) - ) if isinstance(a, np.ndarray) and isinstance(b, np.ndarray): + if obj is None: + obj = 'numpy array' + na, nb = a.size, b.size + if a.shape != b.shape: + from pandas.util.testing import raise_assert_detail + raise_assert_detail(obj, '{0} shapes are different'.format(obj), + a.shape, b.shape) try: if np.array_equal(a, b): return True except: pass + else: + if obj is None: + obj = 'Iterable' + na, nb = len(a), len(b) + + if na != nb: + from pandas.util.testing import raise_assert_detail + raise_assert_detail(obj, '{0} length are different'.format(obj), + na, nb) + + for i in xrange(len(a)): + try: + assert_almost_equal(a[i], b[i], check_less_precise) + except AssertionError: + is_unequal = True + diff += 1 - for i in xrange(na): - assert_almost_equal(a[i], b[i], check_less_precise) + if is_unequal: + from pandas.util.testing import raise_assert_detail + msg = '{0} values are different ({1} %)'.format(obj, np.round(diff * 100.0 / na, 5)) + raise_assert_detail(obj, msg, lobj, robj) return True + elif isiterable(b): - assert False, ( - "Second object is iterable, first isn't: %r != %r" % (a, b) - ) + from pandas.util.testing import raise_assert_detail + if obj is None: + obj = 'Iterable' + msg = "Second object is iterable, first isn't" + raise_assert_detail(obj, msg, a, b) if isnull(a): assert isnull(b), ( diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py index d6e57e76d0ec9..3c988943301c0 100644 --- a/pandas/tests/test_index.py +++ b/pandas/tests/test_index.py @@ -3371,7 +3371,10 @@ def test_inplace_mutation_resets_values(self): # make sure label setting works too labels2 = [[0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0]] - exp_values = np.array([(long(1), 'a')] * 6, dtype=object) + exp_values = np.empty((6, ), dtype=object) + exp_values[:] = [(long(1), 'a')] * 6 + # must be 1d array of tuples + self.assertEqual(exp_values.shape, (6, )) new_values = mi2.set_labels(labels2).values # not inplace shouldn't change assert_almost_equal(mi2._tuples, vals2) @@ -4772,8 +4775,20 @@ def test_repr_roundtrip(self): mi = MultiIndex.from_product([list('ab'),range(3)],names=['first','second']) str(mi) - tm.assert_index_equal(eval(repr(mi)),mi,exact=True) - + + if compat.PY3: + tm.assert_index_equal(eval(repr(mi)), mi, exact=True) + else: + result = eval(repr(mi)) + # string coerces to unicode + tm.assert_index_equal(result, mi, exact=False) + self.assertEqual(mi.get_level_values('first').inferred_type, 'string') + self.assertEqual(result.get_level_values('first').inferred_type, 'unicode') + + mi_u = MultiIndex.from_product([list(u'ab'),range(3)],names=['first','second']) + result = eval(repr(mi_u)) + tm.assert_index_equal(result, mi_u, exact=True) + # formatting if compat.PY3: str(mi) @@ -4783,7 +4798,19 @@ def test_repr_roundtrip(self): # long format mi = MultiIndex.from_product([list('abcdefg'),range(10)],names=['first','second']) result = str(mi) - tm.assert_index_equal(eval(repr(mi)),mi,exact=True) + + if compat.PY3: + tm.assert_index_equal(eval(repr(mi)), mi, exact=True) + else: + result = eval(repr(mi)) + # string coerces to unicode + tm.assert_index_equal(result, mi, exact=False) + self.assertEqual(mi.get_level_values('first').inferred_type, 'string') + self.assertEqual(result.get_level_values('first').inferred_type, 'unicode') + + mi = MultiIndex.from_product([list(u'abcdefg'),range(10)],names=['first','second']) + result = eval(repr(mi_u)) + tm.assert_index_equal(result, mi_u, exact=True) def test_str(self): # tested elsewhere diff --git a/pandas/tests/test_testing.py b/pandas/tests/test_testing.py index 668579911d6d5..f4fbc19535107 100644 --- a/pandas/tests/test_testing.py +++ b/pandas/tests/test_testing.py @@ -10,7 +10,8 @@ import pandas.util.testing as tm from pandas.util.testing import ( assert_almost_equal, assertRaisesRegexp, raise_with_traceback, - assert_series_equal, assert_frame_equal, RNGContext + assert_index_equal, assert_series_equal, assert_frame_equal, + assert_numpy_array_equal, assert_isinstance, RNGContext ) # let's get meta. @@ -132,6 +133,275 @@ def test_raise_with_traceback(self): raise_with_traceback(e, traceback) +class TestAssertNumpyArrayEqual(tm.TestCase): + + def test_numpy_array_equal_message(self): + + expected = """numpy array are different + +numpy array shapes are different +\\[left\\]: \\(2,\\) +\\[right\\]: \\(3,\\)""" + with assertRaisesRegexp(AssertionError, expected): + assert_numpy_array_equal(np.array([1, 2]), np.array([3, 4, 5])) + + with assertRaisesRegexp(AssertionError, expected): + assert_almost_equal(np.array([1, 2]), np.array([3, 4, 5])) + + # scalar comparison + expected = """: 1 != 2""" + with assertRaisesRegexp(AssertionError, expected): + assert_numpy_array_equal(1, 2) + expected = """expected 2\\.00000 but got 1\\.00000, with decimal 5""" + with assertRaisesRegexp(AssertionError, expected): + assert_almost_equal(1, 2) + + # array / scalar array comparison + expected = """(numpy array|Iterable) are different + +First object is iterable, second isn't +\\[left\\]: \\[1\\] +\\[right\\]: 1""" + with assertRaisesRegexp(AssertionError, expected): + assert_numpy_array_equal(np.array([1]), 1) + with assertRaisesRegexp(AssertionError, expected): + assert_almost_equal(np.array([1]), 1) + + # scalar / array comparison + expected = """(numpy array|Iterable) are different + +Second object is iterable, first isn't +\\[left\\]: 1 +\\[right\\]: \\[1\\]""" + with assertRaisesRegexp(AssertionError, expected): + assert_numpy_array_equal(1, np.array([1])) + with assertRaisesRegexp(AssertionError, expected): + assert_almost_equal(1, np.array([1])) + + expected = """numpy array are different + +numpy array values are different \\(66\\.66667 %\\) +\\[left\\]: \\[nan, 2\\.0, 3\\.0\\] +\\[right\\]: \\[1\\.0, nan, 3\\.0\\]""" + with assertRaisesRegexp(AssertionError, expected): + assert_numpy_array_equal(np.array([np.nan, 2, 3]), np.array([1, np.nan, 3])) + with assertRaisesRegexp(AssertionError, expected): + assert_almost_equal(np.array([np.nan, 2, 3]), np.array([1, np.nan, 3])) + + expected = """numpy array are different + +numpy array values are different \\(50\\.0 %\\) +\\[left\\]: \\[1, 2\\] +\\[right\\]: \\[1, 3\\]""" + with assertRaisesRegexp(AssertionError, expected): + assert_numpy_array_equal(np.array([1, 2]), np.array([1, 3])) + with assertRaisesRegexp(AssertionError, expected): + assert_almost_equal(np.array([1, 2]), np.array([1, 3])) + + + expected = """numpy array are different + +numpy array values are different \\(50\\.0 %\\) +\\[left\\]: \\[1\\.1, 2\\.000001\\] +\\[right\\]: \\[1\\.1, 2.0\\]""" + with assertRaisesRegexp(AssertionError, expected): + assert_numpy_array_equal(np.array([1.1, 2.000001]), np.array([1.1, 2.0])) + + # must pass + assert_almost_equal(np.array([1.1, 2.000001]), np.array([1.1, 2.0])) + + expected = """numpy array are different + +numpy array values are different \\(16\\.66667 %\\) +\\[left\\]: \\[\\[1, 2\\], \\[3, 4\\], \\[5, 6\\]\\] +\\[right\\]: \\[\\[1, 3\\], \\[3, 4\\], \\[5, 6\\]\\]""" + with assertRaisesRegexp(AssertionError, expected): + assert_numpy_array_equal(np.array([[1, 2], [3, 4], [5, 6]]), + np.array([[1, 3], [3, 4], [5, 6]])) + with assertRaisesRegexp(AssertionError, expected): + assert_almost_equal(np.array([[1, 2], [3, 4], [5, 6]]), + np.array([[1, 3], [3, 4], [5, 6]])) + + expected = """numpy array are different + +numpy array values are different \\(25\\.0 %\\) +\\[left\\]: \\[\\[1, 2\\], \\[3, 4\\]\\] +\\[right\\]: \\[\\[1, 3\\], \\[3, 4\\]\\]""" + with assertRaisesRegexp(AssertionError, expected): + assert_numpy_array_equal(np.array([[1, 2], [3, 4]]), + np.array([[1, 3], [3, 4]])) + with assertRaisesRegexp(AssertionError, expected): + assert_almost_equal(np.array([[1, 2], [3, 4]]), + np.array([[1, 3], [3, 4]])) + + # allow to overwrite message + expected = """Index are different + +Index shapes are different +\\[left\\]: \\(2,\\) +\\[right\\]: \\(3,\\)""" + with assertRaisesRegexp(AssertionError, expected): + assert_numpy_array_equal(np.array([1, 2]), np.array([3, 4, 5]), + obj='Index') + with assertRaisesRegexp(AssertionError, expected): + assert_almost_equal(np.array([1, 2]), np.array([3, 4, 5]), + obj='Index') + + def test_assert_almost_equal_iterable_message(self): + + expected = """Iterable are different + +Iterable length are different +\\[left\\]: 2 +\\[right\\]: 3""" + with assertRaisesRegexp(AssertionError, expected): + assert_almost_equal([1, 2], [3, 4, 5]) + + expected = """Iterable are different + +Iterable values are different \\(50\\.0 %\\) +\\[left\\]: \\[1, 2\\] +\\[right\\]: \\[1, 3\\]""" + with assertRaisesRegexp(AssertionError, expected): + assert_almost_equal([1, 2], [1, 3]) + + +class TestAssertIndexEqual(unittest.TestCase): + _multiprocess_can_split_ = True + + def test_index_equal_message(self): + + expected = """Index are different + +Index levels are different +\\[left\\]: 1, Int64Index\\(\\[1, 2, 3\\], dtype='int64'\\) +\\[right\\]: 2, MultiIndex\\(levels=\\[\\[u?'A', u?'B'\\], \\[1, 2, 3, 4\\]\\], + labels=\\[\\[0, 0, 1, 1\\], \\[0, 1, 2, 3\\]\\]\\)""" + idx1 = pd.Index([1, 2, 3]) + idx2 = pd.MultiIndex.from_tuples([('A', 1), ('A', 2), ('B', 3), ('B', 4)]) + with assertRaisesRegexp(AssertionError, expected): + assert_index_equal(idx1, idx2) + with assertRaisesRegexp(AssertionError, expected): + assert_index_equal(idx1, idx2, exact=False) + + + expected = """MultiIndex level \\[1\\] are different + +MultiIndex level \\[1\\] values are different \\(25\\.0 %\\) +\\[left\\]: Int64Index\\(\\[2, 2, 3, 4\\], dtype='int64'\\) +\\[right\\]: Int64Index\\(\\[1, 2, 3, 4\\], dtype='int64'\\)""" + idx1 = pd.MultiIndex.from_tuples([('A', 2), ('A', 2), ('B', 3), ('B', 4)]) + idx2 = pd.MultiIndex.from_tuples([('A', 1), ('A', 2), ('B', 3), ('B', 4)]) + with assertRaisesRegexp(AssertionError, expected): + assert_index_equal(idx1, idx2) + with assertRaisesRegexp(AssertionError, expected): + assert_index_equal(idx1, idx2, check_exact=False) + + expected = """Index are different + +Index length are different +\\[left\\]: 3, Int64Index\\(\\[1, 2, 3\\], dtype='int64'\\) +\\[right\\]: 4, Int64Index\\(\\[1, 2, 3, 4\\], dtype='int64'\\)""" + idx1 = pd.Index([1, 2, 3]) + idx2 = pd.Index([1, 2, 3, 4]) + with assertRaisesRegexp(AssertionError, expected): + assert_index_equal(idx1, idx2) + with assertRaisesRegexp(AssertionError, expected): + assert_index_equal(idx1, idx2, check_exact=False) + + expected = """Index are different + +Index classes are different +\\[left\\]: Int64Index\\(\\[1, 2, 3\\], dtype='int64'\\) +\\[right\\]: Float64Index\\(\\[1\\.0, 2\\.0, 3\\.0\\], dtype='float64'\\)""" + idx1 = pd.Index([1, 2, 3]) + idx2 = pd.Index([1, 2, 3.0]) + with assertRaisesRegexp(AssertionError, expected): + assert_index_equal(idx1, idx2, exact=True) + with assertRaisesRegexp(AssertionError, expected): + assert_index_equal(idx1, idx2, exact=True, check_exact=False) + + expected = """Index are different + +Index values are different \\(33\\.33333 %\\) +\\[left\\]: Float64Index\\(\\[1.0, 2.0, 3.0], dtype='float64'\\) +\\[right\\]: Float64Index\\(\\[1.0, 2.0, 3.0000000001\\], dtype='float64'\\)""" + idx1 = pd.Index([1, 2, 3.]) + idx2 = pd.Index([1, 2, 3.0000000001]) + with assertRaisesRegexp(AssertionError, expected): + assert_index_equal(idx1, idx2) + + # must success + assert_index_equal(idx1, idx2, check_exact=False) + + expected = """Index are different + +Index values are different \\(33\\.33333 %\\) +\\[left\\]: Float64Index\\(\\[1.0, 2.0, 3.0], dtype='float64'\\) +\\[right\\]: Float64Index\\(\\[1.0, 2.0, 3.0001\\], dtype='float64'\\)""" + idx1 = pd.Index([1, 2, 3.]) + idx2 = pd.Index([1, 2, 3.0001]) + with assertRaisesRegexp(AssertionError, expected): + assert_index_equal(idx1, idx2) + with assertRaisesRegexp(AssertionError, expected): + assert_index_equal(idx1, idx2, check_exact=False) + # must success + assert_index_equal(idx1, idx2, check_exact=False, check_less_precise=True) + + expected = """Index are different + +Index values are different \\(33\\.33333 %\\) +\\[left\\]: Int64Index\\(\\[1, 2, 3\\], dtype='int64'\\) +\\[right\\]: Int64Index\\(\\[1, 2, 4\\], dtype='int64'\\)""" + idx1 = pd.Index([1, 2, 3]) + idx2 = pd.Index([1, 2, 4]) + with assertRaisesRegexp(AssertionError, expected): + assert_index_equal(idx1, idx2) + with assertRaisesRegexp(AssertionError, expected): + assert_index_equal(idx1, idx2, check_less_precise=True) + + expected = """MultiIndex level \\[1\\] are different + +MultiIndex level \\[1\\] values are different \\(25\\.0 %\\) +\\[left\\]: Int64Index\\(\\[2, 2, 3, 4\\], dtype='int64'\\) +\\[right\\]: Int64Index\\(\\[1, 2, 3, 4\\], dtype='int64'\\)""" + idx1 = pd.MultiIndex.from_tuples([('A', 2), ('A', 2), ('B', 3), ('B', 4)]) + idx2 = pd.MultiIndex.from_tuples([('A', 1), ('A', 2), ('B', 3), ('B', 4)]) + with assertRaisesRegexp(AssertionError, expected): + assert_index_equal(idx1, idx2) + with assertRaisesRegexp(AssertionError, expected): + assert_index_equal(idx1, idx2, check_exact=False) + + def test_index_equal_metadata_message(self): + + expected = """Index are different + +Attribute "names" are different +\\[left\\]: \\[None\\] +\\[right\\]: \\[u?'x'\\]""" + idx1 = pd.Index([1, 2, 3]) + idx2 = pd.Index([1, 2, 3], name='x') + with assertRaisesRegexp(AssertionError, expected): + assert_index_equal(idx1, idx2) + + # same name, should pass + assert_index_equal(pd.Index([1, 2, 3], name=np.nan), + pd.Index([1, 2, 3], name=np.nan)) + assert_index_equal(pd.Index([1, 2, 3], name=pd.NaT), + pd.Index([1, 2, 3], name=pd.NaT)) + + + expected = """Index are different + +Attribute "names" are different +\\[left\\]: \\[nan\\] +\\[right\\]: \\[NaT\\]""" + idx1 = pd.Index([1, 2, 3], name=np.nan) + idx2 = pd.Index([1, 2, 3], name=pd.NaT) + with assertRaisesRegexp(AssertionError, expected): + assert_index_equal(idx1, idx2) + + class TestAssertSeriesEqual(tm.TestCase): _multiprocess_can_split_ = True @@ -191,6 +461,28 @@ def test_multiindex_dtype(self): {'a':[1.0,2.0],'b':[2.1,1.5],'c':['l1','l2']}, index=['a','b']) self._assert_not_equal(df1.c, df2.c, check_index_type=True) + def test_series_equal_message(self): + + expected = """Series are different + +Series length are different +\\[left\\]: 3, Int64Index\\(\\[0, 1, 2\\], dtype='int64'\\) +\\[right\\]: 4, Int64Index\\(\\[0, 1, 2, 3\\], dtype='int64'\\)""" + with assertRaisesRegexp(AssertionError, expected): + assert_series_equal(pd.Series([1, 2, 3]), pd.Series([1, 2, 3, 4])) + + + expected = """Series are different + +Series values are different \\(33\\.33333 %\\) +\\[left\\]: \\[1, 2, 3\\] +\\[right\\]: \\[1, 2, 4\\]""" + with assertRaisesRegexp(AssertionError, expected): + assert_series_equal(pd.Series([1, 2, 3]), pd.Series([1, 2, 4])) + with assertRaisesRegexp(AssertionError, expected): + assert_series_equal(pd.Series([1, 2, 3]), pd.Series([1, 2, 4]), + check_less_precise=True) + class TestAssertFrameEqual(tm.TestCase): _multiprocess_can_split_ = True @@ -224,6 +516,65 @@ def test_empty_dtypes(self): self._assert_equal(df1, df2, check_dtype=False) self._assert_not_equal(df1, df2, check_dtype=True) + def test_frame_equal_message(self): + + expected = """DataFrame are different + +DataFrame shape \\(number of rows\\) are different +\\[left\\]: 3, Int64Index\\(\\[0, 1, 2\\], dtype='int64'\\) +\\[right\\]: 4, Int64Index\\(\\[0, 1, 2, 3\\], dtype='int64'\\)""" + with assertRaisesRegexp(AssertionError, expected): + assert_frame_equal(pd.DataFrame({'A':[1, 2, 3]}), + pd.DataFrame({'A':[1, 2, 3, 4]})) + + + expected = """DataFrame are different + +DataFrame shape \\(number of columns\\) are different +\\[left\\]: 2, Index\\(\\[u?'A', u?'B'\\], dtype='object'\\) +\\[right\\]: 1, Index\\(\\[u?'A'\\], dtype='object'\\)""" + with assertRaisesRegexp(AssertionError, expected): + assert_frame_equal(pd.DataFrame({'A':[1, 2, 3], 'B':[4, 5, 6]}), + pd.DataFrame({'A':[1, 2, 3]})) + + + expected = """DataFrame\\.index are different + +DataFrame\\.index values are different \\(33\\.33333 %\\) +\\[left\\]: Index\\(\\[u?'a', u?'b', u?'c'\\], dtype='object'\\) +\\[right\\]: Index\\(\\[u?'a', u?'b', u?'d'\\], dtype='object'\\)""" + with assertRaisesRegexp(AssertionError, expected): + assert_frame_equal(pd.DataFrame({'A':[1, 2, 3], 'B':[4, 5, 6]}, + index=['a', 'b', 'c']), + pd.DataFrame({'A':[1, 2, 3], 'B':[4, 5, 6]}, + index=['a', 'b', 'd'])) + + expected = """DataFrame\\.columns are different + +DataFrame\\.columns values are different \\(50\\.0 %\\) +\\[left\\]: Index\\(\\[u?'A', u?'B'\\], dtype='object'\\) +\\[right\\]: Index\\(\\[u?'A', u?'b'\\], dtype='object'\\)""" + with assertRaisesRegexp(AssertionError, expected): + assert_frame_equal(pd.DataFrame({'A':[1, 2, 3], 'B':[4, 5, 6]}, + index=['a', 'b', 'c']), + pd.DataFrame({'A':[1, 2, 3], 'b':[4, 5, 6]}, + index=['a', 'b', 'c'])) + + + expected = """DataFrame\\.iloc\\[:, 1\\] are different + +DataFrame\\.iloc\\[:, 1\\] values are different \\(33\\.33333 %\\) +\\[left\\]: \\[4, 5, 6\\] +\\[right\\]: \\[4, 5, 7\\]""" + with assertRaisesRegexp(AssertionError, expected): + assert_frame_equal(pd.DataFrame({'A':[1, 2, 3], 'B':[4, 5, 6]}), + pd.DataFrame({'A':[1, 2, 3], 'B':[4, 5, 7]})) + + with assertRaisesRegexp(AssertionError, expected): + assert_frame_equal(pd.DataFrame({'A':[1, 2, 3], 'B':[4, 5, 6]}), + pd.DataFrame({'A':[1, 2, 3], 'B':[4, 5, 7]}), + by_blocks=True) + class TestRNGContext(unittest.TestCase): diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 979ac007c7500..4b7c8d4540e0f 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -23,8 +23,9 @@ import numpy as np import pandas as pd -from pandas.core.common import (is_sequence, array_equivalent, is_list_like, is_number, - is_datetimelike_v_numeric, is_datetimelike_v_object) +from pandas.core.common import (is_sequence, array_equivalent, is_list_like, + is_datetimelike_v_numeric, is_datetimelike_v_object, + is_number, pprint_thing, take_1d) import pandas.compat as compat from pandas.compat import( filter, map, zip, range, unichr, lrange, lmap, lzip, u, callable, Counter, @@ -536,23 +537,128 @@ def assert_equal(a, b, msg=""): assert a == b, "%s: %r != %r" % (msg.format(a,b), a, b) -def assert_index_equal(left, right, exact=False, check_names=True): +def assert_index_equal(left, right, exact=False, check_names=True, + check_less_precise=False, check_exact=True, obj='Index'): + """Check that left and right Index are equal. + + Parameters + ---------- + left : Index + right : Index + exact : bool, default False + Whether to check the Index class, dtype and inferred_type are identical. + check_names : bool, default True + Whether to check the names attribute. + check_less_precise : bool, default False + Specify comparison precision. Only used when check_exact is False. + 5 digits (False) or 3 digits (True) after decimal points are compared. + check_exact : bool, default True + Whether to compare number exactly. + obj : str, default 'Index' + Specify object name being compared, internally used to show appropriate + assertion message + """ + + def _check_types(l, r, obj='Index'): + if exact: + if type(l) != type(r): + msg = '{0} classes are different'.format(obj) + raise_assert_detail(obj, msg, l, r) + assert_attr_equal('dtype', l, r, obj=obj) + assert_attr_equal('inferred_type', l, r, obj=obj) + + def _get_ilevel_values(index, level): + # accept level number only + unique = index.levels[level] + labels = index.labels[level] + filled = take_1d(unique.values, labels, fill_value=unique._na_value) + values = unique._simple_new(filled, index.names[level], + freq=getattr(unique, 'freq', None), + tz=getattr(unique, 'tz', None)) + return values + + # instance validation assertIsInstance(left, Index, '[index] ') assertIsInstance(right, Index, '[index] ') - if not left.equals(right) or (exact and type(left) != type(right)): - raise AssertionError("[index] left [{0} {1}], right [{2} {3}]".format(left.dtype, - left, - right, - right.dtype)) + + # class / dtype comparison + _check_types(left, right) + + # level comparison + if left.nlevels != right.nlevels: + raise_assert_detail(obj, '{0} levels are different'.format(obj), + '{0}, {1}'.format(left.nlevels, left), + '{0}, {1}'.format(right.nlevels, right)) + + # length comparison + if len(left) != len(right): + raise_assert_detail(obj, '{0} length are different'.format(obj), + '{0}, {1}'.format(len(left), left), + '{0}, {1}'.format(len(right), right)) + + # MultiIndex special comparison for little-friendly error messages + if left.nlevels > 1: + for level in range(left.nlevels): + # cannot use get_level_values here because it can change dtype + llevel = _get_ilevel_values(left, level) + rlevel = _get_ilevel_values(right, level) + + lobj = 'MultiIndex level [{0}]'.format(level) + assert_index_equal(llevel, rlevel, + exact=exact, check_names=check_names, + check_less_precise=check_less_precise, + check_exact=check_exact, obj=lobj) + # get_level_values may change dtype + _check_types(left.levels[level], right.levels[level], obj=obj) + + if check_exact: + if not left.equals(right): + diff = np.sum((left.values != right.values).astype(int)) * 100.0 / len(left) + msg = '{0} values are different ({1} %)'.format(obj, np.round(diff, 5)) + raise_assert_detail(obj, msg, left, right) + else: + assert_almost_equal(left.values, right.values, + check_less_precise=check_less_precise, + obj=obj, lobj=left, robj=right) + + # metadata comparison if check_names: - assert_attr_equal('names', left, right) + assert_attr_equal('names', left, right, obj=obj) + +def assert_attr_equal(attr, left, right, obj='Attributes'): + """checks attributes are equal. Both objects must have attribute. + + Parameters + ---------- + attr : str + Attribute name being compared. + left : object + right : object + obj : str, default 'Attributes' + Specify object name being compared, internally used to show appropriate + assertion message + """ -def assert_attr_equal(attr, left, right): - """checks attributes are equal. Both objects must have attribute.""" left_attr = getattr(left, attr) right_attr = getattr(right, attr) - assert_equal(left_attr,right_attr,"attr is not equal [{0}]" .format(attr)) + + if left_attr is right_attr: + return True + elif (is_number(left_attr) and np.isnan(left_attr) and + is_number(right_attr) and np.isnan(right_attr)): + # np.nan + return True + + result = left_attr == right_attr + if not isinstance(result, bool): + result = result.all() + + if result: + return True + else: + raise_assert_detail(obj, 'Attribute "{0}" are different'.format(attr), + left_attr, right_attr) def isiterable(obj): @@ -607,6 +713,7 @@ def assertIsInstance(obj, cls, msg=''): def assert_isinstance(obj, class_type_or_tuple, msg=''): return deprecate('assert_isinstance', assertIsInstance)(obj, class_type_or_tuple, msg=msg) + def assertNotIsInstance(obj, cls, msg=''): """Test that obj is not an instance of cls (which can be a class or a tuple of classes, @@ -630,8 +737,23 @@ def assert_categorical_equal(res, exp): raise AssertionError("ordered not the same") -def assert_numpy_array_equal(np_array, assert_equal, - strict_nan=False, err_msg=None): +def raise_assert_detail(obj, message, left, right): + if isinstance(left, np.ndarray): + left = pprint_thing(left) + if isinstance(right, np.ndarray): + right = pprint_thing(right) + + msg = """{0} are different + +{1} +[left]: {2} +[right]: {3}""".format(obj, message, left, right) + raise AssertionError(msg) + + +def assert_numpy_array_equal(left, right, + strict_nan=False, err_msg=None, + obj='numpy array'): """Checks that 'np_array' is equivalent to 'assert_equal'. This is similar to ``numpy.testing.assert_array_equal``, but can @@ -639,10 +761,42 @@ def assert_numpy_array_equal(np_array, assert_equal, equivalent if the arrays have equal non-NaN elements, and `np.nan` in corresponding locations. """ - if array_equivalent(np_array, assert_equal, strict_nan=strict_nan): + + # compare shape and values + if array_equivalent(left, right, strict_nan=strict_nan): return + if err_msg is None: - err_msg = '{0} is not equivalent to {1}.'.format(np_array, assert_equal) + # show detailed error + + if np.isscalar(left) and np.isscalar(right): + # show scalar comparison error + assert_equal(left, right) + elif is_list_like(left) and is_list_like(right): + # some test cases pass list + left = np.asarray(left) + right = np.array(right) + + if left.shape != right.shape: + raise_assert_detail(obj, '{0} shapes are different'.format(obj), + left.shape, right.shape) + + diff = 0 + for l, r in zip(left, right): + # count up differences + if not array_equivalent(l, r, strict_nan=strict_nan): + diff += 1 + + diff = diff * 100.0 / left.size + msg = '{0} values are different ({1} %)'.format(obj, np.round(diff, 5)) + raise_assert_detail(obj, msg, left, right) + elif is_list_like(left): + msg = "First object is iterable, second isn't" + raise_assert_detail(obj, msg, left, right) + else: + msg = "Second object is iterable, first isn't" + raise_assert_detail(obj, msg, left, right) + raise AssertionError(err_msg) @@ -651,17 +805,62 @@ def assert_series_equal(left, right, check_dtype=True, check_index_type=False, check_series_type=False, check_less_precise=False, - check_exact=False, check_names=True, - check_datetimelike_compat=False): + check_exact=False, + check_datetimelike_compat=False, + obj='Series'): + + """Check that left and right Series are equal. + + Parameters + ---------- + left : Series + right : Series + check_dtype : bool, default True + Whether to check the Series dtype is identical. + check_index_type : bool, default False + Whether to check the Index class, dtype and inferred_type are identical. + check_series_type : bool, default False + Whether to check the Series class is identical. + check_less_precise : bool, default False + Specify comparison precision. Only used when check_exact is False. + 5 digits (False) or 3 digits (True) after decimal points are compared. + check_exact : bool, default False + Whether to compare number exactly. + check_names : bool, default True + Whether to check the Series and Index names attribute. + check_dateteimelike_compat : bool, default False + Compare datetime-like which is comparable ignoring dtype. + obj : str, default 'Series' + Specify object name being compared, internally used to show appropriate + assertion message + """ + + # instance validation + assertIsInstance(left, Series, '[Series] ') + assertIsInstance(right, Series, '[Series] ') + if check_series_type: assertIsInstance(left, type(right)) + + # length comparison + if len(left) != len(right): + raise_assert_detail(obj, 'Series length are different', + '{0}, {1}'.format(len(left), left.index), + '{0}, {1}'.format(len(right), right.index)) + + # index comparison + assert_index_equal(left.index, right.index, exact=check_index_type, + check_names=check_names, + check_less_precise=check_less_precise, check_exact=check_exact, + obj='{0}.index'.format(obj)) + if check_dtype: assert_attr_equal('dtype', left, right) + if check_exact: - if not np.array_equal(left.values, right.values): - raise AssertionError('{0} is not equal to {1}.'.format(left.values, - right.values)) + assert_numpy_array_equal(left.get_values(), right.get_values(), + obj='{0}'.format(obj)) elif check_datetimelike_compat: # we want to check only if we have compat dtypes # e.g. integer and M|m are NOT compat, but we can simply check the values in that case @@ -675,27 +874,12 @@ def assert_series_equal(left, right, check_dtype=True, else: assert_numpy_array_equal(left.values, right.values) else: - assert_almost_equal(left.values, right.values, check_less_precise) - if check_less_precise: - assert_almost_equal( - left.index.values, right.index.values, check_less_precise) - else: - assert_index_equal(left.index, right.index, check_names=check_names) - if check_index_type: - for level in range(left.index.nlevels): - lindex = left.index.get_level_values(level) - rindex = right.index.get_level_values(level) - assertIsInstance(lindex, type(rindex)) - assert_attr_equal('dtype', lindex, rindex) - assert_attr_equal('inferred_type', lindex, rindex) + assert_almost_equal(left.get_values(), right.get_values(), + check_less_precise, obj='{0}'.format(obj)) + + # metadata comparison if check_names: - if is_number(left.name) and np.isnan(left.name): - # Series.name can be np.nan in some test cases - assert is_number(right.name) and np.isnan(right.name) - elif left.name is pd.NaT: - assert right.name is pd.NaT - else: - assert_attr_equal('name', left, right) + assert_attr_equal('name', left, right, obj=obj) # This could be refactored to use the NDFrame.equals method @@ -707,19 +891,69 @@ def assert_frame_equal(left, right, check_dtype=True, check_names=True, by_blocks=False, check_exact=False, - check_datetimelike_compat=False): + check_datetimelike_compat=False, + obj='DataFrame'): + + """Check that left and right DataFrame are equal. + + Parameters + ---------- + left : DataFrame + right : DataFrame + check_dtype : bool, default True + Whether to check the DataFrame dtype is identical. + check_index_type : bool, default False + Whether to check the Index class, dtype and inferred_type are identical. + check_column_type : bool, default False + Whether to check the columns class, dtype and inferred_type are identical. + check_frame_type : bool, default False + Whether to check the DataFrame class is identical. + check_less_precise : bool, default False + Specify comparison precision. Only used when check_exact is False. + 5 digits (False) or 3 digits (True) after decimal points are compared. + check_names : bool, default True + Whether to check the Index names attribute. + by_blocks : bool, default False + Specify how to compare internal data. If False, compare by columns. + If True, compare by blocks. + check_exact : bool, default False + Whether to compare number exactly. + check_dateteimelike_compat : bool, default False + Compare datetime-like which is comparable ignoring dtype. + obj : str, default 'DataFrame' + Specify object name being compared, internally used to show appropriate + assertion message + """ + + # instance validation + assertIsInstance(left, DataFrame, '[DataFrame] ') + assertIsInstance(right, DataFrame, '[DataFrame] ') + if check_frame_type: assertIsInstance(left, type(right)) - assertIsInstance(left, DataFrame) - assertIsInstance(right, DataFrame) - if check_less_precise: - if not by_blocks: - assert_almost_equal(left.columns, right.columns) - assert_almost_equal(left.index, right.index) - else: - if not by_blocks: - assert_index_equal(left.columns, right.columns, check_names=check_names) + # shape comparison (row) + if left.shape[0] != right.shape[0]: + raise_assert_detail(obj, 'DataFrame shape (number of rows) are different', + '{0}, {1}'.format(left.shape[0], left.index), + '{0}, {1}'.format(right.shape[0], right.index)) + # shape comparison (columns) + if left.shape[1] != right.shape[1]: + raise_assert_detail(obj, 'DataFrame shape (number of columns) are different', + '{0}, {1}'.format(left.shape[1], left.columns), + '{0}, {1}'.format(right.shape[1], right.columns)) + + # index comparison + assert_index_equal(left.index, right.index, exact=check_index_type, + check_names=check_names, + check_less_precise=check_less_precise, check_exact=check_exact, + obj='{0}.index'.format(obj)) + + # column comparison + assert_index_equal(left.columns, right.columns, exact=check_column_type, + check_names=check_names, + check_less_precise=check_less_precise, check_exact=check_exact, + obj='{0}.columns'.format(obj)) # compare by blocks if by_blocks: @@ -728,7 +962,8 @@ def assert_frame_equal(left, right, check_dtype=True, for dtype in list(set(list(lblocks.keys()) + list(rblocks.keys()))): assert dtype in lblocks assert dtype in rblocks - assert_frame_equal(lblocks[dtype],rblocks[dtype], check_dtype=check_dtype) + assert_frame_equal(lblocks[dtype], rblocks[dtype], + check_dtype=check_dtype, obj='DataFrame.blocks') # compare by columns else: @@ -742,22 +977,8 @@ def assert_frame_equal(left, right, check_dtype=True, check_less_precise=check_less_precise, check_exact=check_exact, check_names=check_names, - check_datetimelike_compat=check_datetimelike_compat) - - if check_index_type: - for level in range(left.index.nlevels): - lindex = left.index.get_level_values(level) - rindex = right.index.get_level_values(level) - assertIsInstance(lindex, type(rindex)) - assert_attr_equal('dtype', lindex, rindex) - assert_attr_equal('inferred_type', lindex, rindex) - if check_column_type: - assertIsInstance(left.columns, type(right.columns)) - assert_attr_equal('dtype', left.columns, right.columns) - assert_attr_equal('inferred_type', left.columns, right.columns) - if check_names: - assert_attr_equal('names', left.index, right.index) - assert_attr_equal('names', left.columns, right.columns) + check_datetimelike_compat=check_datetimelike_compat, + obj='DataFrame.iloc[:, {0}]'.format(i)) def assert_panelnd_equal(left, right,