From ce7bd745ed84471d10bbd0a5ccee06c077e8b086 Mon Sep 17 00:00:00 2001 From: jojomdt Date: Fri, 24 Feb 2017 16:26:19 +0800 Subject: [PATCH 1/5] reindex_like after shape comparison in assert_frame_equal, if check_like, the former code reindex_like before shape comparison. for example: if left.shape=(2,2), right.shpae(2.0), after reindex_like, left.shape(2,0),right.shape(2,0),then the shape comparison will get wrong result. --- pandas/util/testing.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 1bd539469dbe3..87ac1faad9212 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -1270,9 +1270,6 @@ def assert_frame_equal(left, right, check_dtype=True, assertIsInstance(left, type(right)) # assert_class_equal(left, right, obj=obj) - if check_like: - left, right = left.reindex_like(right), right - # shape comparison (row) if left.shape[0] != right.shape[0]: raise_assert_detail(obj, @@ -1288,6 +1285,9 @@ def assert_frame_equal(left, right, check_dtype=True, left.columns), '{0}, {1}'.format(right.shape[1], right.columns)) + + if check_like: + left, right = left.reindex_like(right), right # index comparison assert_index_equal(left.index, right.index, exact=check_index_type, From 470dbaa6cd5816fa2797d6f919d76c499df5bfec Mon Sep 17 00:00:00 2001 From: jojomdt Date: Fri, 24 Feb 2017 23:48:35 +0800 Subject: [PATCH 2/5] combine row and column shape comparison --- pandas/util/testing.py | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 87ac1faad9212..625ece8df594a 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -1270,21 +1270,12 @@ def assert_frame_equal(left, right, check_dtype=True, assertIsInstance(left, type(right)) # assert_class_equal(left, right, obj=obj) - # shape comparison (row) - if left.shape[0] != right.shape[0]: + # shape comparison + if left.shape != right.shape: raise_assert_detail(obj, - 'DataFrame shape (number of rows) are different', - '{0}, {1}'.format(left.shape[0], left.index), - '{0}, {1}'.format(right.shape[0], right.index)) - # shape comparison (columns) - if left.shape[1] != right.shape[1]: - raise_assert_detail(obj, - 'DataFrame shape (number of columns) ' - 'are different', - '{0}, {1}'.format(left.shape[1], - left.columns), - '{0}, {1}'.format(right.shape[1], - right.columns)) + 'DataFrame shape mismatch', + '({0}, {1})'.format(*left.shape), + '({0}, {1})'.format(*right.shape)) if check_like: left, right = left.reindex_like(right), right From c03e0af0fe8448c09a23f0d5265e8c59a8b08d71 Mon Sep 17 00:00:00 2001 From: jojomdt Date: Sat, 25 Feb 2017 00:56:38 +0800 Subject: [PATCH 3/5] add test for TestAssertFrameEqual add test_equal_with_different_row_order, change shape error message test --- pandas/tests/test_testing.py | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/pandas/tests/test_testing.py b/pandas/tests/test_testing.py index 07bfdc8fc9078..cd6fef8ef7403 100644 --- a/pandas/tests/test_testing.py +++ b/pandas/tests/test_testing.py @@ -593,6 +593,17 @@ def _assert_equal(self, x, y, **kwargs): def _assert_not_equal(self, a, b, **kwargs): self.assertRaises(AssertionError, assert_frame_equal, a, b, **kwargs) self.assertRaises(AssertionError, assert_frame_equal, b, a, **kwargs) + + def test_equal_with_different_row_order(self): + self._assert_equal(pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, + index=['a', 'b', 'c']), + pd.DataFrame({'A': [3, 2, 1], 'B': [6, 5, 4]}, + index=['c', 'b', 'a']), + check_like=True) + + def test_not_equal_with_different_shape(self): + self._assert_not_equal(pd.DataFrame({'A': [1, 2, 3]}), + pd.DataFrame({'A': [1, 2, 3, 4]})) def test_index_dtype(self): df1 = DataFrame.from_records( @@ -621,19 +632,9 @@ def test_frame_equal_message(self): expected = """DataFrame are different -DataFrame shape \\(number of rows\\) are different -\\[left\\]: 3, RangeIndex\\(start=0, stop=3, step=1\\) -\\[right\\]: 4, RangeIndex\\(start=0, stop=4, step=1\\)""" - - with assertRaisesRegexp(AssertionError, expected): - assert_frame_equal(pd.DataFrame({'A': [1, 2, 3]}), - pd.DataFrame({'A': [1, 2, 3, 4]})) - - expected = """DataFrame are different - -DataFrame shape \\(number of columns\\) are different -\\[left\\]: 2, Index\\(\\[u?'A', u?'B'\\], dtype='object'\\) -\\[right\\]: 1, Index\\(\\[u?'A'\\], dtype='object'\\)""" +DataFrame shape mismatch +\\[left\\]: \\(3, 2\\) +\\[right\\]: \\(4, 1\\)""" with assertRaisesRegexp(AssertionError, expected): assert_frame_equal(pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}), From 0340b5c3d23cfae3b2b8ecb32a1052a71fbfe19b Mon Sep 17 00:00:00 2001 From: jojomdt Date: Sat, 25 Feb 2017 00:58:36 +0800 Subject: [PATCH 4/5] change check_like description change it to ''If true, ignore the order of rows & columns'' --- pandas/util/testing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 625ece8df594a..5c6bab808fdc2 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -1254,7 +1254,7 @@ def assert_frame_equal(left, right, check_dtype=True, check_categorical : bool, default True Whether to compare internal Categorical exactly. check_like : bool, default False - If true, then reindex_like operands + If true, ignore the order of rows & columns obj : str, default 'DataFrame' Specify object name being compared, internally used to show appropriate assertion message From 7b3437b0eb228f020871046754d3bd38492f179e Mon Sep 17 00:00:00 2001 From: jojomdt Date: Sun, 26 Feb 2017 17:22:31 +0800 Subject: [PATCH 5/5] fix test_frame_equal_message error change \\[right\\]: \\(4, 1\\) to \\[right\\]: \\(3, 1\\) --- pandas/tests/test_testing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/test_testing.py b/pandas/tests/test_testing.py index cd6fef8ef7403..38c11e51b5bde 100644 --- a/pandas/tests/test_testing.py +++ b/pandas/tests/test_testing.py @@ -634,7 +634,7 @@ def test_frame_equal_message(self): DataFrame shape mismatch \\[left\\]: \\(3, 2\\) -\\[right\\]: \\(4, 1\\)""" +\\[right\\]: \\(3, 1\\)""" with assertRaisesRegexp(AssertionError, expected): assert_frame_equal(pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}),