From 9cd83ff2dd2c1defd16b321e41411ce112974108 Mon Sep 17 00:00:00 2001
From: Alejandro Giacometti <alejandro.giacometti@gmail.com>
Date: Tue, 3 Apr 2018 14:13:08 +0100
Subject: [PATCH 01/11] Safely raise errors when object contains unicode

This safely turns nd.array objects that contain unicode into a
representation that can be printed
---
 pandas/util/testing.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/util/testing.py b/pandas/util/testing.py
index 6e13a17eba68c..cb3cc0e1c6a76 100644
--- a/pandas/util/testing.py
+++ b/pandas/util/testing.py
@@ -989,11 +989,11 @@ def assert_categorical_equal(left, right, check_dtype=True,
 
 def raise_assert_detail(obj, message, left, right, diff=None):
     if isinstance(left, np.ndarray):
-        left = pprint_thing(left)
+        left = repr(pprint_thing(left))
     elif is_categorical_dtype(left):
         left = repr(left)
     if isinstance(right, np.ndarray):
-        right = pprint_thing(right)
+        right = repr(pprint_thing(right))
     elif is_categorical_dtype(right):
         right = repr(right)
 

From 46cadc0b6d4ec7b1d969701f59bf0e0c9a885634 Mon Sep 17 00:00:00 2001
From: Alejandro Giacometti <alejandro.giacometti@gmail.com>
Date: Wed, 4 Apr 2018 11:35:22 +0100
Subject: [PATCH 02/11] Whatsnew entry

---
 doc/source/whatsnew/v0.23.0.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt
index 09bd09b06d9b9..6571c1d3690ad 100644
--- a/doc/source/whatsnew/v0.23.0.txt
+++ b/doc/source/whatsnew/v0.23.0.txt
@@ -1165,3 +1165,4 @@ Other
 
 - Improved error message when attempting to use a Python keyword as an identifier in a ``numexpr`` backed query (:issue:`18221`)
 - Bug in accessing a :func:`pandas.get_option`, which raised ``KeyError`` rather than ``OptionError`` when looking up a non-existant option key in some cases (:issue:`19789`)
+- Bug in :func:`raise_assert_detail` for Series and DataFrames with differing unicode data (:issue:`20503`)

From 99ac0e8634d603d22c9d1635914ce70a48eb5939 Mon Sep 17 00:00:00 2001
From: Alejandro Giacometti <alejandro.giacometti@gmail.com>
Date: Thu, 5 Apr 2018 15:58:24 +0100
Subject: [PATCH 03/11] Tests for comparisons of objects containing unicode

---
 pandas/tests/util/test_testing.py | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/pandas/tests/util/test_testing.py b/pandas/tests/util/test_testing.py
index 1c878604b11a2..7c5e95bcd7f76 100644
--- a/pandas/tests/util/test_testing.py
+++ b/pandas/tests/util/test_testing.py
@@ -276,6 +276,19 @@ def test_numpy_array_equal_message(self):
             assert_almost_equal(np.array([[1, 2], [3, 4]]),
                                 np.array([[1, 3], [3, 4]]))
 
+        expected = """numpy array are different
+
+numpy array values are different \\(33\\.33333 %\\)
+\\[left\\]:  \\[á, à, ä\\]
+\\[right\\]: \\[á, à, å\\]"""
+
+        with tm.assert_raises_regex(AssertionError, expected):
+            assert_numpy_array_equal(np.array([u"á", u"à", u"ä"]),
+                                     np.array([u"á", u"à", u"å"]))
+        with tm.assert_raises_regex(AssertionError, expected):
+            assert_almost_equal(np.array([u"á", u"à", u"ä"]),
+                                np.array([u"á", u"à", u"å"]))
+
         # allow to overwrite message
         expected = """Index are different
 
@@ -499,10 +512,12 @@ def _assert_not_equal(self, a, b, **kwargs):
     def test_equal(self):
         self._assert_equal(Series(range(3)), Series(range(3)))
         self._assert_equal(Series(list('abc')), Series(list('abc')))
+        self._assert_equal(Series(list(u'áàä')), Series(list(u'áàä')))
 
     def test_not_equal(self):
         self._assert_not_equal(Series(range(3)), Series(range(3)) + 1)
         self._assert_not_equal(Series(list('abc')), Series(list('xyz')))
+        self._assert_not_equal(Series(list(u'áàä')), Series(list(u'éèë')))
         self._assert_not_equal(Series(range(3)), Series(range(4)))
         self._assert_not_equal(
             Series(range(3)), Series(
@@ -678,6 +693,21 @@ def test_frame_equal_message(self):
                                pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 7]}),
                                by_blocks=True)
 
+        expected = """DataFrame\\.iloc\\[:, 1\\] are different
+
+DataFrame\\.iloc\\[:, 1\\] values are different \\(33\\.33333 %\\)
+\\[left\\]:  \\[é, è, ë\\]
+\\[right\\]: \\[é, è, e̊\\]"""
+
+        with tm.assert_raises_regex(AssertionError, expected):
+            assert_frame_equal(pd.DataFrame({'A': [u"á", u"à", u"ä"], 'E': [u"é", u"è", u"ë"]}),
+                               pd.DataFrame({'A': [u"á", u"à", u"ä"], 'E': [u"é", u"è", u"e̊"]}))
+
+        with tm.assert_raises_regex(AssertionError, expected):
+            assert_frame_equal(pd.DataFrame({'A': [u"á", u"à", u"ä"], 'E': [u"é", u"è", u"ë"]}),
+                               pd.DataFrame({'A': [u"á", u"à", u"ä"], 'E': [u"é", u"è", u"e̊"]}),
+                               by_blocks=True)
+
 
 class TestAssertCategoricalEqual(object):
 

From 329002c5a38fe217e1fb47337c82a988a11a27d6 Mon Sep 17 00:00:00 2001
From: Alejandro Giacometti <alejandro.giacometti@gmail.com>
Date: Thu, 5 Apr 2018 15:59:27 +0100
Subject: [PATCH 04/11] Only need to pprint with the display encoding

---
 pandas/util/testing.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/util/testing.py b/pandas/util/testing.py
index cb3cc0e1c6a76..9582947945c0f 100644
--- a/pandas/util/testing.py
+++ b/pandas/util/testing.py
@@ -31,7 +31,7 @@
     is_interval_dtype,
     is_sequence,
     is_list_like)
-from pandas.io.formats.printing import pprint_thing
+from pandas.io.formats.printing import pprint_thing_encoded
 from pandas.core.algorithms import take_1d
 import pandas.core.common as com
 
@@ -989,11 +989,11 @@ def assert_categorical_equal(left, right, check_dtype=True,
 
 def raise_assert_detail(obj, message, left, right, diff=None):
     if isinstance(left, np.ndarray):
-        left = repr(pprint_thing(left))
+        left = pprint_thing_encoded(left, encoding=pd.options.display.encoding)
     elif is_categorical_dtype(left):
         left = repr(left)
     if isinstance(right, np.ndarray):
-        right = repr(pprint_thing(right))
+        right = pprint_thing_encoded(right, encoding=pd.options.display.encoding)
     elif is_categorical_dtype(right):
         right = repr(right)
 

From b506cf6ae2f8abca1119bc8a88eaa0e303063a63 Mon Sep 17 00:00:00 2001
From: Alejandro Giacometti <alejandro.giacometti@gmail.com>
Date: Thu, 5 Apr 2018 16:09:20 +0100
Subject: [PATCH 05/11] Linting

---
 pandas/tests/util/test_testing.py | 20 ++++++++++++--------
 pandas/util/testing.py            |  6 ++++--
 2 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/pandas/tests/util/test_testing.py b/pandas/tests/util/test_testing.py
index 7c5e95bcd7f76..7d14b99d611f9 100644
--- a/pandas/tests/util/test_testing.py
+++ b/pandas/tests/util/test_testing.py
@@ -283,11 +283,11 @@ def test_numpy_array_equal_message(self):
 \\[right\\]: \\[á, à, å\\]"""
 
         with tm.assert_raises_regex(AssertionError, expected):
-            assert_numpy_array_equal(np.array([u"á", u"à", u"ä"]),
-                                     np.array([u"á", u"à", u"å"]))
+            assert_numpy_array_equal(np.array([u'á', u'à', u'ä']),
+                                     np.array([u'á', u'à', u'å']))
         with tm.assert_raises_regex(AssertionError, expected):
-            assert_almost_equal(np.array([u"á", u"à", u"ä"]),
-                                np.array([u"á", u"à", u"å"]))
+            assert_almost_equal(np.array([u'á', u'à', u'ä']),
+                                np.array([u'á', u'à', u'å']))
 
         # allow to overwrite message
         expected = """Index are different
@@ -700,12 +700,16 @@ def test_frame_equal_message(self):
 \\[right\\]: \\[é, è, e̊\\]"""
 
         with tm.assert_raises_regex(AssertionError, expected):
-            assert_frame_equal(pd.DataFrame({'A': [u"á", u"à", u"ä"], 'E': [u"é", u"è", u"ë"]}),
-                               pd.DataFrame({'A': [u"á", u"à", u"ä"], 'E': [u"é", u"è", u"e̊"]}))
+            assert_frame_equal(pd.DataFrame({'A': [u'á', u'à', u'ä'],
+                                             'E': [u'é', u'è', u'ë']}),
+                               pd.DataFrame({'A': [u'á', u'à', u'ä'],
+                                             'E': [u'é', u'è', u'e̊']}))
 
         with tm.assert_raises_regex(AssertionError, expected):
-            assert_frame_equal(pd.DataFrame({'A': [u"á", u"à", u"ä"], 'E': [u"é", u"è", u"ë"]}),
-                               pd.DataFrame({'A': [u"á", u"à", u"ä"], 'E': [u"é", u"è", u"e̊"]}),
+            assert_frame_equal(pd.DataFrame({'A': [u'á', u'à', u'ä'],
+                                             'E': [u'é', u'è', u'ë']}),
+                               pd.DataFrame({'A': [u'á', u'à', u'ä'],
+                                             'E': [u'é', u'è', u'e̊']}),
                                by_blocks=True)
 
 
diff --git a/pandas/util/testing.py b/pandas/util/testing.py
index 9582947945c0f..2f4375f98f702 100644
--- a/pandas/util/testing.py
+++ b/pandas/util/testing.py
@@ -989,11 +989,13 @@ def assert_categorical_equal(left, right, check_dtype=True,
 
 def raise_assert_detail(obj, message, left, right, diff=None):
     if isinstance(left, np.ndarray):
-        left = pprint_thing_encoded(left, encoding=pd.options.display.encoding)
+        left = pprint_thing_encoded(left,
+                                    encoding=pd.options.display.encoding)
     elif is_categorical_dtype(left):
         left = repr(left)
     if isinstance(right, np.ndarray):
-        right = pprint_thing_encoded(right, encoding=pd.options.display.encoding)
+        right = pprint_thing_encoded(right,
+                                     encoding=pd.options.display.encoding)
     elif is_categorical_dtype(right):
         right = repr(right)
 

From 8f607c3b0e78858ea3cde8148854e90892f33f4e Mon Sep 17 00:00:00 2001
From: Alejandro Giacometti <alejandro.giacometti@gmail.com>
Date: Thu, 5 Apr 2018 16:27:46 +0100
Subject: [PATCH 06/11] Whatnew update

---
 doc/source/whatsnew/v0.23.0.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt
index 6571c1d3690ad..ca46f94752731 100644
--- a/doc/source/whatsnew/v0.23.0.txt
+++ b/doc/source/whatsnew/v0.23.0.txt
@@ -1165,4 +1165,4 @@ Other
 
 - Improved error message when attempting to use a Python keyword as an identifier in a ``numexpr`` backed query (:issue:`18221`)
 - Bug in accessing a :func:`pandas.get_option`, which raised ``KeyError`` rather than ``OptionError`` when looking up a non-existant option key in some cases (:issue:`19789`)
-- Bug in :func:`raise_assert_detail` for Series and DataFrames with differing unicode data (:issue:`20503`)
+- Bug in :func:`assert_series_equal` and :func:`assert_frame_equal` for Series or DataFrames with differing unicode data (:issue:`20503`)

From 6b087f286a88fc086144e5871b1540b0a464686d Mon Sep 17 00:00:00 2001
From: Alejandro Giacometti <alejandro.giacometti@gmail.com>
Date: Thu, 5 Apr 2018 16:46:39 +0100
Subject: [PATCH 07/11] Separate tests and document gh issue

---
 pandas/tests/util/test_testing.py | 36 ++++++++++++++++++++-----------
 1 file changed, 23 insertions(+), 13 deletions(-)

diff --git a/pandas/tests/util/test_testing.py b/pandas/tests/util/test_testing.py
index 7d14b99d611f9..4668e1bec43e3 100644
--- a/pandas/tests/util/test_testing.py
+++ b/pandas/tests/util/test_testing.py
@@ -276,19 +276,6 @@ def test_numpy_array_equal_message(self):
             assert_almost_equal(np.array([[1, 2], [3, 4]]),
                                 np.array([[1, 3], [3, 4]]))
 
-        expected = """numpy array are different
-
-numpy array values are different \\(33\\.33333 %\\)
-\\[left\\]:  \\[á, à, ä\\]
-\\[right\\]: \\[á, à, å\\]"""
-
-        with tm.assert_raises_regex(AssertionError, expected):
-            assert_numpy_array_equal(np.array([u'á', u'à', u'ä']),
-                                     np.array([u'á', u'à', u'å']))
-        with tm.assert_raises_regex(AssertionError, expected):
-            assert_almost_equal(np.array([u'á', u'à', u'ä']),
-                                np.array([u'á', u'à', u'å']))
-
         # allow to overwrite message
         expected = """Index are different
 
@@ -303,6 +290,24 @@ def test_numpy_array_equal_message(self):
             assert_almost_equal(np.array([1, 2]), np.array([3, 4, 5]),
                                 obj='Index')
 
+    def test_numpy_array_equal_unicode_message(self):
+        # Test ensures that `assert_numpy_array_equals` raises the right
+        # exception when comparing np.arrays containing differing
+        # unicode objects (#20503)
+
+        expected = """numpy array are different
+
+numpy array values are different \\(33\\.33333 %\\)
+\\[left\\]:  \\[á, à, ä\\]
+\\[right\\]: \\[á, à, å\\]"""
+
+        with tm.assert_raises_regex(AssertionError, expected):
+            assert_numpy_array_equal(np.array([u'á', u'à', u'ä']),
+                                     np.array([u'á', u'à', u'å']))
+        with tm.assert_raises_regex(AssertionError, expected):
+            assert_almost_equal(np.array([u'á', u'à', u'ä']),
+                                np.array([u'á', u'à', u'å']))
+
     @td.skip_if_windows
     def test_numpy_array_equal_object_message(self):
 
@@ -693,6 +698,11 @@ def test_frame_equal_message(self):
                                pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 7]}),
                                by_blocks=True)
 
+    def test_frame_equal_message_unicode(self):
+        # Test ensures that `assert_frame_equals` raises the right
+        # exception when comparing DataFrames containing differing
+        # unicode objects (#20503)
+
         expected = """DataFrame\\.iloc\\[:, 1\\] are different
 
 DataFrame\\.iloc\\[:, 1\\] values are different \\(33\\.33333 %\\)

From 51366f272ee2e4bd03820a91a3668e6e3c13a998 Mon Sep 17 00:00:00 2001
From: Alejandro Giacometti <alejandro.giacometti@gmail.com>
Date: Mon, 9 Apr 2018 16:25:43 +0100
Subject: [PATCH 08/11] Encode in utf-8 only in python2

---
 pandas/util/testing.py | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/pandas/util/testing.py b/pandas/util/testing.py
index 2f4375f98f702..007c751687c35 100644
--- a/pandas/util/testing.py
+++ b/pandas/util/testing.py
@@ -31,7 +31,7 @@
     is_interval_dtype,
     is_sequence,
     is_list_like)
-from pandas.io.formats.printing import pprint_thing_encoded
+from pandas.io.formats.printing import pprint_thing
 from pandas.core.algorithms import take_1d
 import pandas.core.common as com
 
@@ -989,16 +989,21 @@ def assert_categorical_equal(left, right, check_dtype=True,
 
 def raise_assert_detail(obj, message, left, right, diff=None):
     if isinstance(left, np.ndarray):
-        left = pprint_thing_encoded(left,
-                                    encoding=pd.options.display.encoding)
+        left = pprint_thing(left)
     elif is_categorical_dtype(left):
         left = repr(left)
+
+    if compat.PY2 and isinstance(left, compat.string_types):
+        left = left.encode('utf-8')
+
     if isinstance(right, np.ndarray):
-        right = pprint_thing_encoded(right,
-                                     encoding=pd.options.display.encoding)
+        right = pprint_thing(right)
     elif is_categorical_dtype(right):
         right = repr(right)
 
+    if compat.PY2 and isinstance(right, compat.string_types):
+        right = right.encode('utf-8')
+
     msg = """{obj} are different
 
 {message}

From 575b2e8edf19559e8e9ad361fcef210d35786c73 Mon Sep 17 00:00:00 2001
From: Alejandro Giacometti <alejandro.giacometti@gmail.com>
Date: Mon, 9 Apr 2018 18:02:36 +0100
Subject: [PATCH 09/11] import compat.PY2 and compat.string_types directly

---
 pandas/util/testing.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/util/testing.py b/pandas/util/testing.py
index 007c751687c35..13250d8440149 100644
--- a/pandas/util/testing.py
+++ b/pandas/util/testing.py
@@ -38,7 +38,7 @@
 import pandas.compat as compat
 from pandas.compat import (
     filter, map, zip, range, unichr, lrange, lmap, lzip, u, callable, Counter,
-    raise_with_traceback, httplib, StringIO, PY3)
+    raise_with_traceback, httplib, StringIO, string_types, PY3, PY2)
 
 from pandas import (bdate_range, CategoricalIndex, Categorical, IntervalIndex,
                     DatetimeIndex, TimedeltaIndex, PeriodIndex, RangeIndex,
@@ -993,7 +993,7 @@ def raise_assert_detail(obj, message, left, right, diff=None):
     elif is_categorical_dtype(left):
         left = repr(left)
 
-    if compat.PY2 and isinstance(left, compat.string_types):
+    if PY2 and isinstance(left, string_types):
         left = left.encode('utf-8')
 
     if isinstance(right, np.ndarray):
@@ -1001,7 +1001,7 @@ def raise_assert_detail(obj, message, left, right, diff=None):
     elif is_categorical_dtype(right):
         right = repr(right)
 
-    if compat.PY2 and isinstance(right, compat.string_types):
+    if PY2 and isinstance(right, string_types):
         right = right.encode('utf-8')
 
     msg = """{obj} are different

From 45d2b8e9990a3b5df851c3b373ea70ed8b801b83 Mon Sep 17 00:00:00 2001
From: Alejandro Giacometti <alejandro.giacometti@gmail.com>
Date: Mon, 9 Apr 2018 18:04:35 +0100
Subject: [PATCH 10/11] Added documenting comments

---
 pandas/util/testing.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pandas/util/testing.py b/pandas/util/testing.py
index 13250d8440149..e1484a9c1b390 100644
--- a/pandas/util/testing.py
+++ b/pandas/util/testing.py
@@ -994,6 +994,7 @@ def raise_assert_detail(obj, message, left, right, diff=None):
         left = repr(left)
 
     if PY2 and isinstance(left, string_types):
+        # left needs to be printable in native text type in python2
         left = left.encode('utf-8')
 
     if isinstance(right, np.ndarray):
@@ -1002,6 +1003,7 @@ def raise_assert_detail(obj, message, left, right, diff=None):
         right = repr(right)
 
     if PY2 and isinstance(right, string_types):
+        # right needs to be printable in native text type in python2
         right = right.encode('utf-8')
 
     msg = """{obj} are different

From 1f7e231fd329330f4a9dbfe33aeafa10968d0ee7 Mon Sep 17 00:00:00 2001
From: Alejandro Giacometti <alejandro.giacometti@gmail.com>
Date: Mon, 9 Apr 2018 18:33:33 +0100
Subject: [PATCH 11/11] Add binary <-> unicode tests

---
 pandas/tests/util/test_testing.py | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/pandas/tests/util/test_testing.py b/pandas/tests/util/test_testing.py
index 4668e1bec43e3..d6f58d16bcf64 100644
--- a/pandas/tests/util/test_testing.py
+++ b/pandas/tests/util/test_testing.py
@@ -523,6 +523,7 @@ def test_not_equal(self):
         self._assert_not_equal(Series(range(3)), Series(range(3)) + 1)
         self._assert_not_equal(Series(list('abc')), Series(list('xyz')))
         self._assert_not_equal(Series(list(u'áàä')), Series(list(u'éèë')))
+        self._assert_not_equal(Series(list(u'áàä')), Series(list(b'aaa')))
         self._assert_not_equal(Series(range(3)), Series(range(4)))
         self._assert_not_equal(
             Series(range(3)), Series(
@@ -722,6 +723,25 @@ def test_frame_equal_message_unicode(self):
                                              'E': [u'é', u'è', u'e̊']}),
                                by_blocks=True)
 
+        expected = """DataFrame\\.iloc\\[:, 0\\] are different
+
+DataFrame\\.iloc\\[:, 0\\] values are different \\(100\\.0 %\\)
+\\[left\\]:  \\[á, à, ä\\]
+\\[right\\]: \\[a, a, a\\]"""
+
+        with tm.assert_raises_regex(AssertionError, expected):
+            assert_frame_equal(pd.DataFrame({'A': [u'á', u'à', u'ä'],
+                                             'E': [u'é', u'è', u'ë']}),
+                               pd.DataFrame({'A': ['a', 'a', 'a'],
+                                             'E': ['e', 'e', 'e']}))
+
+        with tm.assert_raises_regex(AssertionError, expected):
+            assert_frame_equal(pd.DataFrame({'A': [u'á', u'à', u'ä'],
+                                             'E': [u'é', u'è', u'ë']}),
+                               pd.DataFrame({'A': ['a', 'a', 'a'],
+                                             'E': ['e', 'e', 'e']}),
+                               by_blocks=True)
+
 
 class TestAssertCategoricalEqual(object):