pandas-dev · sinhrks · May 4, 2016
diff --git a/pandas/computation/tests/test_eval.py b/pandas/computation/tests/test_eval.py
@@ -607,7 +607,7 @@ def test_unary_in_array(self):
                     '-37, 37, ~37, +37]'),
             np.array([-True, True, ~True, +True,
                       -False, False, ~False, +False,
-                      -37, 37, ~37, +37]))
+                      -37, 37, ~37, +37], dtype=np.object_))
 
     def test_disallow_scalar_bool_ops(self):
         exprs = '1 or 2', '1 and 2'

diff --git a/pandas/core/common.py b/pandas/core/common.py
@@ -307,7 +307,8 @@ def array_equivalent(left, right, strict_nan=False):
         return False
 
     # Object arrays can contain None, NaN and NaT.
-    if is_object_dtype(left) or is_object_dtype(right):
+    # string dtypes must be come to this path for NumPy 1.7.1 compat
+    if is_string_dtype(left) or is_string_dtype(right):
 
         if not strict_nan:
             # pd.isnull considers NaN and None to be equivalent.

diff --git a/pandas/io/tests/json/test_pandas.py b/pandas/io/tests/json/test_pandas.py
@@ -139,7 +139,7 @@ def test_frame_from_json_to_json(self):
         def _check_orient(df, orient, dtype=None, numpy=False,
                           convert_axes=True, check_dtype=True, raise_ok=None,
                           sort=None, check_index_type=True,
-                          check_column_type=True):
+                          check_column_type=True, check_numpy_dtype=False):
             if sort is not None:
                 df = df.sort_values(sort)
             else:
@@ -181,22 +181,25 @@ def _check_orient(df, orient, dtype=None, numpy=False,
                     unser.index.values.astype('i8') * 1e6)
             if orient == "records":
                 # index is not captured in this orientation
-                assert_almost_equal(df.values, unser.values)
+                assert_almost_equal(df.values, unser.values,
+                                    check_dtype=check_numpy_dtype)
                 self.assertTrue(df.columns.equals(unser.columns))
             elif orient == "values":
                 # index and cols are not captured in this orientation
                 if numpy is True and df.shape == (0, 0):
                     assert unser.shape[0] == 0
                 else:
-                    assert_almost_equal(df.values, unser.values)
+                    assert_almost_equal(df.values, unser.values,
+                                        check_dtype=check_numpy_dtype)
             elif orient == "split":
                 # index and col labels might not be strings
                 unser.index = [str(i) for i in unser.index]
                 unser.columns = [str(i) for i in unser.columns]
 
                 if sort is None:
                     unser = unser.sort_index()
-                assert_almost_equal(df.values, unser.values)
+                assert_almost_equal(df.values, unser.values,
+                                    check_dtype=check_numpy_dtype)
             else:
                 if convert_axes:
                     assert_frame_equal(df, unser, check_dtype=check_dtype,

diff --git a/pandas/sparse/tests/test_array.py b/pandas/sparse/tests/test_array.py
@@ -272,7 +272,8 @@ def test_constructor_bool(self):
         self.assertEqual(arr.dtype, bool)
         tm.assert_numpy_array_equal(arr.sp_values, np.array([True, True]))
         tm.assert_numpy_array_equal(arr.sp_values, np.asarray(arr))
-        tm.assert_numpy_array_equal(arr.sp_index.indices, np.array([2, 3]))
+        tm.assert_numpy_array_equal(arr.sp_index.indices,
+                                    np.array([2, 3], np.int32))
 
         for dense in [arr.to_dense(), arr.values]:
             self.assertEqual(dense.dtype, bool)
@@ -297,9 +298,11 @@ def test_constructor_float32(self):
         arr = SparseArray(data, dtype=np.float32)
 
         self.assertEqual(arr.dtype, np.float32)
-        tm.assert_numpy_array_equal(arr.sp_values, np.array([1, 3]))
+        tm.assert_numpy_array_equal(arr.sp_values,
+                                    np.array([1, 3], dtype=np.float32))
         tm.assert_numpy_array_equal(arr.sp_values, np.asarray(arr))
-        tm.assert_numpy_array_equal(arr.sp_index.indices, np.array([0, 2]))
+        tm.assert_numpy_array_equal(arr.sp_index.indices,
+                                    np.array([0, 2], dtype=np.int32))
 
         for dense in [arr.to_dense(), arr.values]:
             self.assertEqual(dense.dtype, np.float32)
@@ -516,7 +519,7 @@ def test_fillna_overlap(self):
         # filling with existing value doesn't replace existing value with
         # fill_value, i.e. existing 3 remains in sp_values
         res = s.fillna(3)
-        exp = np.array([1, 3, 3, 3, 3])
+        exp = np.array([1, 3, 3, 3, 3], dtype=np.float64)
         tm.assert_numpy_array_equal(res.to_dense(), exp)
 
         s = SparseArray([1, np.nan, np.nan, 3, np.nan], fill_value=0)

diff --git a/pandas/src/testing.pyx b/pandas/src/testing.pyx
@@ -1,7 +1,7 @@
 import numpy as np
 
 from pandas import compat
-from pandas.core.common import isnull, array_equivalent
+from pandas.core.common import isnull, array_equivalent, is_dtype_equal
 
 cdef NUMERIC_TYPES = (
     bool,
@@ -55,7 +55,7 @@ cpdef assert_dict_equal(a, b, bint compare_keys=True):
 
     return True
 
-cpdef assert_almost_equal(a, b, bint check_less_precise=False,
+cpdef assert_almost_equal(a, b, bint check_less_precise=False, check_dtype=True,
                           obj=None, lobj=None, robj=None):
     """Check that left and right objects are almost equal.
 
@@ -66,6 +66,8 @@ cpdef assert_almost_equal(a, b, bint check_less_precise=False,
     check_less_precise : bool, default False
         Specify comparison precision.
         5 digits (False) or 3 digits (True) after decimal points are compared.
+    check_dtype: bool, default True
+        check dtype if both a and b are np.ndarray
     obj : str, default None
         Specify object name being compared, internally used to show appropriate
         assertion message
@@ -82,7 +84,7 @@ cpdef assert_almost_equal(a, b, bint check_less_precise=False,
         double diff = 0.0
         Py_ssize_t i, na, nb
         double fa, fb
-        bint is_unequal = False
+        bint is_unequal = False, a_is_ndarray, b_is_ndarray
 
     if lobj is None:
         lobj = a
@@ -97,36 +99,43 @@ cpdef assert_almost_equal(a, b, bint check_less_precise=False,
         assert a == b, "%r != %r" % (a, b)
         return True
 
+    a_is_ndarray = isinstance(a, np.ndarray)
+    b_is_ndarray = isinstance(b, np.ndarray)
+
+    if obj is None:
+        if a_is_ndarray or b_is_ndarray:
+            obj = 'numpy array'
+        else:
+            obj = 'Iterable'
+
     if isiterable(a):
 
         if not isiterable(b):
-            from pandas.util.testing import raise_assert_detail
-            if obj is None:
-                obj = 'Iterable'
-            msg = "First object is iterable, second isn't"
-            raise_assert_detail(obj, msg, a, b)
+            from pandas.util.testing import assert_class_equal
+            # classes can't be the same, to raise error
+            assert_class_equal(a, b, obj=obj)
 
         assert has_length(a) and has_length(b), (
             "Can't compare objects without length, one or both is invalid: "
-            "(%r, %r)" % (a, b)
-        )
+            "(%r, %r)" % (a, b))
 
-        if isinstance(a, np.ndarray) and isinstance(b, np.ndarray):
-            if obj is None:
-                obj = 'numpy array'
+        if a_is_ndarray and b_is_ndarray:
             na, nb = a.size, b.size
             if a.shape != b.shape:
                 from pandas.util.testing import raise_assert_detail
                 raise_assert_detail(obj, '{0} shapes are different'.format(obj),
                                     a.shape, b.shape)
+
+            if check_dtype and not is_dtype_equal(a, b):
+                from pandas.util.testing import assert_attr_equal
+                assert_attr_equal('dtype', a, b, obj=obj)
+
             try:
                 if array_equivalent(a, b, strict_nan=True):
                     return True
             except:
                 pass
         else:
-            if obj is None:
-                obj = 'Iterable'
             na, nb = len(a), len(b)
 
         if na != nb:
@@ -149,54 +158,38 @@ cpdef assert_almost_equal(a, b, bint check_less_precise=False,
         return True
 
     elif isiterable(b):
-        from pandas.util.testing import raise_assert_detail
-        if obj is None:
-            obj = 'Iterable'
-        msg = "Second object is iterable, first isn't"
-        raise_assert_detail(obj, msg, a, b)
+        from pandas.util.testing import assert_class_equal
+        # classes can't be the same, to raise error
+        assert_class_equal(a, b, obj=obj)
 
-    if isnull(a):
-        assert isnull(b), (
-            "First object is null, second isn't: %r != %r" % (a, b)
-        )
+    if a == b:
+        # object comparison
         return True
-    elif isnull(b):
-        assert isnull(a), (
-            "First object is not null, second is null: %r != %r" % (a, b)
-        )
+    if isnull(a) and isnull(b):
+        # nan / None comparison
         return True
-
-    if is_comparable_as_number(a):
-        assert is_comparable_as_number(b), (
-            "First object is numeric, second is not: %r != %r" % (a, b)
-        )
+    if is_comparable_as_number(a) and is_comparable_as_number(b):
+        if array_equivalent(a, b, strict_nan=True):
+            # inf comparison
+            return True
 
         decimal = 5
 
         # deal with differing dtypes
         if check_less_precise:
             decimal = 3
 
-        if np.isinf(a):
-            assert np.isinf(b), "First object is inf, second isn't"
-            if np.isposinf(a):
-                assert np.isposinf(b), "First object is positive inf, second is negative inf"
-            else:
-                assert np.isneginf(b), "First object is negative inf, second is positive inf"
+        fa, fb = a, b
+
+        # case for zero
+        if abs(fa) < 1e-5:
+            if not decimal_almost_equal(fa, fb, decimal):
+                assert False, (
+                    '(very low values) expected %.5f but got %.5f, with decimal %d' % (fb, fa, decimal)
+                )
         else:
-            fa, fb = a, b
-
-            # case for zero
-            if abs(fa) < 1e-5:
-                if not decimal_almost_equal(fa, fb, decimal):
-                    assert False, (
-                        '(very low values) expected %.5f but got %.5f, with decimal %d' % (fb, fa, decimal)
-                    )
-            else:
-                if not decimal_almost_equal(1, fb / fa, decimal):
-                    assert False, 'expected %.5f but got %.5f, with decimal %d' % (fb, fa, decimal)
-
-    else:
-        assert a == b, "%r != %r" % (a, b)
+            if not decimal_almost_equal(1, fb / fa, decimal):
+                assert False, 'expected %.5f but got %.5f, with decimal %d' % (fb, fa, decimal)
+        return True
 
-    return True
+    raise AssertionError("{0} != {1}".format(a, b))
diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py
@@ -1430,17 +1430,18 @@ def test_setitem_frame(self):
 
         # already aligned
         f = self.mixed_frame.copy()
-        piece = DataFrame([[1, 2], [3, 4]], index=f.index[
-                          0:2], columns=['A', 'B'])
+        piece = DataFrame([[1., 2.], [3., 4.]],
+                          index=f.index[0:2], columns=['A', 'B'])
         key = (slice(None, 2), ['A', 'B'])
         f.ix[key] = piece
         assert_almost_equal(f.ix[0:2, ['A', 'B']].values,
                             piece.values)
 
         # rows unaligned
         f = self.mixed_frame.copy()
-        piece = DataFrame([[1, 2], [3, 4], [5, 6], [7, 8]], index=list(
-            f.index[0:2]) + ['foo', 'bar'], columns=['A', 'B'])
+        piece = DataFrame([[1., 2.], [3., 4.], [5., 6.], [7., 8.]],
+                          index=list(f.index[0:2]) + ['foo', 'bar'],
+                          columns=['A', 'B'])
         key = (slice(None, 2), ['A', 'B'])
         f.ix[key] = piece
         assert_almost_equal(f.ix[0:2:, ['A', 'B']].values,

diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py
@@ -274,7 +274,7 @@ def test_argsort(self):
 
             result = ind.argsort()
             expected = np.array(ind).argsort()
-            tm.assert_numpy_array_equal(result, expected)
+            tm.assert_numpy_array_equal(result, expected, check_dtype=False)
 
     def test_numpy_argsort(self):
         for k, ind in self.indices.items():

diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py
@@ -216,13 +216,15 @@ def test_map(self):
                              ordered=False)
         tm.assert_categorical_equal(result, exp)
 
-        tm.assert_numpy_array_equal(ci.map(lambda x: 1), np.array([1] * 5))
+        tm.assert_numpy_array_equal(ci.map(lambda x: 1),
+                                    np.array([1] * 5, dtype=np.int64))
 
         # change categories dtype
         ci = pd.CategoricalIndex(list('ABABC'), categories=list('BAC'),
                                  ordered=False)
         def f(x):
             return {'A': 10, 'B': 20, 'C': 30}.get(x)
+
         result = ci.map(f)
         exp = pd.Categorical([10, 20, 10, 20, 30], categories=[20, 10, 30],
                              ordered=False)
@@ -340,30 +342,35 @@ def test_reindexing(self):
             tm.assert_numpy_array_equal(expected, actual)
 
     def test_reindex_dtype(self):
-        res, indexer = CategoricalIndex(['a', 'b', 'c', 'a']).reindex(['a', 'c'
-                                                                       ])
+        c = CategoricalIndex(['a', 'b', 'c', 'a'])
+        res, indexer = c.reindex(['a', 'c'])
         tm.assert_index_equal(res, Index(['a', 'a', 'c']), exact=True)
-        tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2]))
-
-        res, indexer = CategoricalIndex(['a', 'b', 'c', 'a']).reindex(
-            Categorical(['a', 'c']))
-        tm.assert_index_equal(res, CategoricalIndex(
-            ['a', 'a', 'c'], categories=['a', 'c']), exact=True)
-        tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2]))
-
-        res, indexer = CategoricalIndex(
-            ['a', 'b', 'c', 'a'
-             ], categories=['a', 'b', 'c', 'd']).reindex(['a', 'c'])
-        tm.assert_index_equal(res, Index(
-            ['a', 'a', 'c'], dtype='object'), exact=True)
-        tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2]))
-
-        res, indexer = CategoricalIndex(
-            ['a', 'b', 'c', 'a'],
-            categories=['a', 'b', 'c', 'd']).reindex(Categorical(['a', 'c']))
-        tm.assert_index_equal(res, CategoricalIndex(
-            ['a', 'a', 'c'], categories=['a', 'c']), exact=True)
-        tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2]))
+        tm.assert_numpy_array_equal(indexer,
+                                    np.array([0, 3, 2], dtype=np.int64))
+
+        c = CategoricalIndex(['a', 'b', 'c', 'a'])
+        res, indexer = c.reindex(Categorical(['a', 'c']))
+
+        exp = CategoricalIndex(['a', 'a', 'c'], categories=['a', 'c'])
+        tm.assert_index_equal(res, exp, exact=True)
+        tm.assert_numpy_array_equal(indexer,
+                                    np.array([0, 3, 2], dtype=np.int64))
+
+        c = CategoricalIndex(['a', 'b', 'c', 'a'],
+                             categories=['a', 'b', 'c', 'd'])
+        res, indexer = c.reindex(['a', 'c'])
+        exp = Index(['a', 'a', 'c'], dtype='object')
+        tm.assert_index_equal(res, exp, exact=True)
+        tm.assert_numpy_array_equal(indexer,
+                                    np.array([0, 3, 2], dtype=np.int64))
+
+        c = CategoricalIndex(['a', 'b', 'c', 'a'],
+                             categories=['a', 'b', 'c', 'd'])
+        res, indexer = c.reindex(Categorical(['a', 'c']))
+        exp = CategoricalIndex(['a', 'a', 'c'], categories=['a', 'c'])
+        tm.assert_index_equal(res, exp, exact=True)
+        tm.assert_numpy_array_equal(indexer,
+                                    np.array([0, 3, 2], dtype=np.int64))
 
     def test_duplicates(self):