From 5af742a645b2841a4889748a18b1983ab4bbae42 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Wed, 25 Apr 2018 22:14:00 -0500
Subject: [PATCH 1/2] TST: Fixed failures in JSON asserts

Fixes an occasional failure in the json tests. They'd fail when the
Series held objects of equal length.

```pytb
pandas/tests/extension/json/test_json.py:114:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
pandas/util/testing.py:1224: in assert_series_equal
    _testing.assert_almost_equal(left.get_values(), right.get_values(),
pandas/core/series.py:466: in get_values
    return self._data.get_values()
pandas/core/internals.py:4742: in get_values
    return np.array(self._block.to_dense(), copy=False)
pandas/core/internals.py:1940: in to_dense
    return np.asarray(self.values)
../../Envs/pandas-dev/lib/python3.6/site-packages/numpy/numpy/core/numeric.py:500: in asarray
    return array(a, dtype, copy=False, order=order)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

self = {'a': 1}, key = 0

    def __getitem__(self, key):
        if key in self.data:
            return self.data[key]
        if hasattr(self.__class__, "__missing__"):
            return self.__class__.__missing__(self, key)
>       raise KeyError(key)
E       KeyError: 0
```

Work around that by converting to dicts before comparing.
---
 pandas/tests/extension/json/array.py     |  6 ++
 pandas/tests/extension/json/test_json.py | 72 ++++++++++++++++++++----
 2 files changed, 67 insertions(+), 11 deletions(-)

diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py
index 95f868e89ac39..2e75bb3b8c326 100644
--- a/pandas/tests/extension/json/array.py
+++ b/pandas/tests/extension/json/array.py
@@ -105,6 +105,12 @@ def take(self, indexer, allow_fill=True, fill_value=None):
     def copy(self, deep=False):
         return type(self)(self.data[:])
 
+    def astype(self, dtype, copy=True):
+        # NumPy has issues when all the dicts are the same length.
+        # np.array([UserDict(...), UserDict(...)]) fails,
+        # but np.array([{...}, {...}]) works, so cast.
+        return np.array([dict(x) for x in self], dtype=dtype, copy=copy)
+
     def unique(self):
         # Parent method doesn't work since np.array will try to infer
         # a 2-dim object.
diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py
index dcf08440738e7..537db09bb17b6 100644
--- a/pandas/tests/extension/json/test_json.py
+++ b/pandas/tests/extension/json/test_json.py
@@ -1,8 +1,10 @@
 import operator
+from collections import UserDict
 
 import pytest
 
-
+import pandas as pd
+import pandas.util.testing as tm
 from pandas.compat import PY2, PY36
 from pandas.tests.extension import base
 
@@ -59,27 +61,75 @@ def data_for_grouping():
     ])
 
 
-class TestDtype(base.BaseDtypeTests):
+class BaseJSON(object):
+    # NumPy doesn't handle an array of equal-length UserDicts.
+    # The default assert_series_equal eventually does a
+    # Series.values, which raises. We work around it by
+    # converting the UserDicts to dicts.
+    def assert_series_equal(self, left, right, **kwargs):
+        if left.dtype.name == 'json':
+            assert left.dtype == right.dtype
+            left = pd.Series(JSONArray(left.values.astype(object)),
+                             index=left.index, name=left.name)
+            right = pd.Series(JSONArray(right.values.astype(object)),
+                              index=right.index, name=right.name)
+        tm.assert_series_equal(left, right, **kwargs)
+
+    def assert_frame_equal(self, left, right, *args, **kwargs):
+        tm.assert_index_equal(
+            left.columns, right.columns,
+            exact=kwargs.get('check_column_type', 'equiv'),
+            check_names=kwargs.get('check_names', True),
+            check_exact=kwargs.get('check_exact', False),
+            check_categorical=kwargs.get('check_categorical', True),
+            obj='{obj}.columns'.format(obj=kwargs.get('obj', 'DataFrame')))
+
+        jsons = (left.dtypes == 'json').index
+
+        for col in jsons:
+            self.assert_series_equal(left[col], right[col],
+                                     *args, **kwargs)
+
+        left = left.drop(columns=jsons)
+        right = right.drop(columns=jsons)
+        tm.assert_frame_equal(left, right, *args, **kwargs)
+
+
+class TestDtype(BaseJSON, base.BaseDtypeTests):
     pass
 
 
-class TestInterface(base.BaseInterfaceTests):
-    pass
+class TestInterface(BaseJSON, base.BaseInterfaceTests):
+    def test_custom_asserts(self):
+        # This would always trigger the KeyError from trying to put
+        # an array of equal-length UserDicts inside an ndarray.
+        data = JSONArray([UserDict({'a': 1}), UserDict({'b': 2}),
+                          UserDict({'c': 3})])
+        a = pd.Series(data)
+        self.assert_series_equal(a, a)
+        self.assert_frame_equal(a.to_frame(), a.to_frame())
+
+        b = pd.Series(data.take([0, 0, 1]))
+        with pytest.raises(AssertionError):
+            self.assert_series_equal(a, b)
+
+        with pytest.raises(AssertionError):
+            self.assert_frame_equal(a.to_frame(), b.to_frame())
 
 
-class TestConstructors(base.BaseConstructorsTests):
+class TestConstructors(BaseJSON, base.BaseConstructorsTests):
     pass
 
 
-class TestReshaping(base.BaseReshapingTests):
+class TestReshaping(BaseJSON, base.BaseReshapingTests):
     pass
 
 
-class TestGetitem(base.BaseGetitemTests):
+class TestGetitem(BaseJSON, base.BaseGetitemTests):
     pass
 
 
-class TestMissing(base.BaseMissingTests):
+class TestMissing(BaseJSON, base.BaseMissingTests):
     @pytest.mark.xfail(reason="Setting a dict as a scalar")
     def test_fillna_series(self):
         """We treat dictionaries as a mapping in fillna, not a scalar."""
@@ -94,7 +144,7 @@ def test_fillna_frame(self):
                               reason="Dictionary order unstable")
 
 
-class TestMethods(base.BaseMethodsTests):
+class TestMethods(BaseJSON, base.BaseMethodsTests):
     @unhashable
     def test_value_counts(self, all_data, dropna):
         pass
@@ -126,7 +176,7 @@ def test_sort_values_missing(self, data_missing_for_sorting, ascending):
             data_missing_for_sorting, ascending)
 
 
-class TestCasting(base.BaseCastingTests):
+class TestCasting(BaseJSON, base.BaseCastingTests):
     @pytest.mark.xfail
     def test_astype_str(self):
         """This currently fails in NumPy on np.array(self, dtype=str) with
@@ -139,7 +189,7 @@ def test_astype_str(self):
 # internals has trouble setting sequences of values into scalar positions.
 
 
-class TestGroupby(base.BaseGroupbyTests):
+class TestGroupby(BaseJSON, base.BaseGroupbyTests):
 
     @unhashable
     def test_groupby_extension_transform(self):

From e774eec76a4eb04e1ec419a9cf40dd6386a74012 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 26 Apr 2018 06:03:18 -0500
Subject: [PATCH 2/2] Py2 compat

---
 pandas/tests/extension/json/test_json.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py
index 537db09bb17b6..0ef34c3b0f679 100644
--- a/pandas/tests/extension/json/test_json.py
+++ b/pandas/tests/extension/json/test_json.py
@@ -1,5 +1,5 @@
 import operator
-from collections import UserDict
+import collections
 
 import pytest
 
@@ -103,8 +103,9 @@ class TestInterface(BaseJSON, base.BaseInterfaceTests):
     def test_custom_asserts(self):
         # This would always trigger the KeyError from trying to put
         # an array of equal-length UserDicts inside an ndarray.
-        data = JSONArray([UserDict({'a': 1}), UserDict({'b': 2}),
-                          UserDict({'c': 3})])
+        data = JSONArray([collections.UserDict({'a': 1}),
+                          collections.UserDict({'b': 2}),
+                          collections.UserDict({'c': 3})])
         a = pd.Series(data)
         self.assert_series_equal(a, a)
         self.assert_frame_equal(a.to_frame(), a.to_frame())