From 5af742a645b2841a4889748a18b1983ab4bbae42 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 25 Apr 2018 22:14:00 -0500 Subject: [PATCH 1/2] TST: Fixed failures in JSON asserts Fixes an occasional failure in the json tests. They'd fail when the Series held objects of equal length. ```pytb pandas/tests/extension/json/test_json.py:114: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ pandas/util/testing.py:1224: in assert_series_equal _testing.assert_almost_equal(left.get_values(), right.get_values(), pandas/core/series.py:466: in get_values return self._data.get_values() pandas/core/internals.py:4742: in get_values return np.array(self._block.to_dense(), copy=False) pandas/core/internals.py:1940: in to_dense return np.asarray(self.values) ../../Envs/pandas-dev/lib/python3.6/site-packages/numpy/numpy/core/numeric.py:500: in asarray return array(a, dtype, copy=False, order=order) _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ self = {'a': 1}, key = 0 def __getitem__(self, key): if key in self.data: return self.data[key] if hasattr(self.__class__, "__missing__"): return self.__class__.__missing__(self, key) > raise KeyError(key) E KeyError: 0 ``` Work around that by converting to dicts before comparing. --- pandas/tests/extension/json/array.py | 6 ++ pandas/tests/extension/json/test_json.py | 72 ++++++++++++++++++++---- 2 files changed, 67 insertions(+), 11 deletions(-) diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index 95f868e89ac39..2e75bb3b8c326 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -105,6 +105,12 @@ def take(self, indexer, allow_fill=True, fill_value=None): def copy(self, deep=False): return type(self)(self.data[:]) + def astype(self, dtype, copy=True): + # NumPy has issues when all the dicts are the same length. + # np.array([UserDict(...), UserDict(...)]) fails, + # but np.array([{...}, {...}]) works, so cast. + return np.array([dict(x) for x in self], dtype=dtype, copy=copy) + def unique(self): # Parent method doesn't work since np.array will try to infer # a 2-dim object. diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py index dcf08440738e7..537db09bb17b6 100644 --- a/pandas/tests/extension/json/test_json.py +++ b/pandas/tests/extension/json/test_json.py @@ -1,8 +1,10 @@ import operator +from collections import UserDict import pytest - +import pandas as pd +import pandas.util.testing as tm from pandas.compat import PY2, PY36 from pandas.tests.extension import base @@ -59,27 +61,75 @@ def data_for_grouping(): ]) -class TestDtype(base.BaseDtypeTests): +class BaseJSON(object): + # NumPy doesn't handle an array of equal-length UserDicts. + # The default assert_series_equal eventually does a + # Series.values, which raises. We work around it by + # converting the UserDicts to dicts. + def assert_series_equal(self, left, right, **kwargs): + if left.dtype.name == 'json': + assert left.dtype == right.dtype + left = pd.Series(JSONArray(left.values.astype(object)), + index=left.index, name=left.name) + right = pd.Series(JSONArray(right.values.astype(object)), + index=right.index, name=right.name) + tm.assert_series_equal(left, right, **kwargs) + + def assert_frame_equal(self, left, right, *args, **kwargs): + tm.assert_index_equal( + left.columns, right.columns, + exact=kwargs.get('check_column_type', 'equiv'), + check_names=kwargs.get('check_names', True), + check_exact=kwargs.get('check_exact', False), + check_categorical=kwargs.get('check_categorical', True), + obj='{obj}.columns'.format(obj=kwargs.get('obj', 'DataFrame'))) + + jsons = (left.dtypes == 'json').index + + for col in jsons: + self.assert_series_equal(left[col], right[col], + *args, **kwargs) + + left = left.drop(columns=jsons) + right = right.drop(columns=jsons) + tm.assert_frame_equal(left, right, *args, **kwargs) + + +class TestDtype(BaseJSON, base.BaseDtypeTests): pass -class TestInterface(base.BaseInterfaceTests): - pass +class TestInterface(BaseJSON, base.BaseInterfaceTests): + def test_custom_asserts(self): + # This would always trigger the KeyError from trying to put + # an array of equal-length UserDicts inside an ndarray. + data = JSONArray([UserDict({'a': 1}), UserDict({'b': 2}), + UserDict({'c': 3})]) + a = pd.Series(data) + self.assert_series_equal(a, a) + self.assert_frame_equal(a.to_frame(), a.to_frame()) + + b = pd.Series(data.take([0, 0, 1])) + with pytest.raises(AssertionError): + self.assert_series_equal(a, b) + + with pytest.raises(AssertionError): + self.assert_frame_equal(a.to_frame(), b.to_frame()) -class TestConstructors(base.BaseConstructorsTests): +class TestConstructors(BaseJSON, base.BaseConstructorsTests): pass -class TestReshaping(base.BaseReshapingTests): +class TestReshaping(BaseJSON, base.BaseReshapingTests): pass -class TestGetitem(base.BaseGetitemTests): +class TestGetitem(BaseJSON, base.BaseGetitemTests): pass -class TestMissing(base.BaseMissingTests): +class TestMissing(BaseJSON, base.BaseMissingTests): @pytest.mark.xfail(reason="Setting a dict as a scalar") def test_fillna_series(self): """We treat dictionaries as a mapping in fillna, not a scalar.""" @@ -94,7 +144,7 @@ def test_fillna_frame(self): reason="Dictionary order unstable") -class TestMethods(base.BaseMethodsTests): +class TestMethods(BaseJSON, base.BaseMethodsTests): @unhashable def test_value_counts(self, all_data, dropna): pass @@ -126,7 +176,7 @@ def test_sort_values_missing(self, data_missing_for_sorting, ascending): data_missing_for_sorting, ascending) -class TestCasting(base.BaseCastingTests): +class TestCasting(BaseJSON, base.BaseCastingTests): @pytest.mark.xfail def test_astype_str(self): """This currently fails in NumPy on np.array(self, dtype=str) with @@ -139,7 +189,7 @@ def test_astype_str(self): # internals has trouble setting sequences of values into scalar positions. -class TestGroupby(base.BaseGroupbyTests): +class TestGroupby(BaseJSON, base.BaseGroupbyTests): @unhashable def test_groupby_extension_transform(self): From e774eec76a4eb04e1ec419a9cf40dd6386a74012 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 26 Apr 2018 06:03:18 -0500 Subject: [PATCH 2/2] Py2 compat --- pandas/tests/extension/json/test_json.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py index 537db09bb17b6..0ef34c3b0f679 100644 --- a/pandas/tests/extension/json/test_json.py +++ b/pandas/tests/extension/json/test_json.py @@ -1,5 +1,5 @@ import operator -from collections import UserDict +import collections import pytest @@ -103,8 +103,9 @@ class TestInterface(BaseJSON, base.BaseInterfaceTests): def test_custom_asserts(self): # This would always trigger the KeyError from trying to put # an array of equal-length UserDicts inside an ndarray. - data = JSONArray([UserDict({'a': 1}), UserDict({'b': 2}), - UserDict({'c': 3})]) + data = JSONArray([collections.UserDict({'a': 1}), + collections.UserDict({'b': 2}), + collections.UserDict({'c': 3})]) a = pd.Series(data) self.assert_series_equal(a, a) self.assert_frame_equal(a.to_frame(), a.to_frame())