Skip to content

TST: Check ndarray dtype by default #13088

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pandas/computation/tests/test_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -607,7 +607,7 @@ def test_unary_in_array(self):
'-37, 37, ~37, +37]'),
np.array([-True, True, ~True, +True,
-False, False, ~False, +False,
-37, 37, ~37, +37]))
-37, 37, ~37, +37], dtype=np.object_))

def test_disallow_scalar_bool_ops(self):
exprs = '1 or 2', '1 and 2'
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,8 @@ def array_equivalent(left, right, strict_nan=False):
return False

# Object arrays can contain None, NaN and NaT.
if is_object_dtype(left) or is_object_dtype(right):
# string dtypes must be come to this path for NumPy 1.7.1 compat
if is_string_dtype(left) or is_string_dtype(right):

if not strict_nan:
# pd.isnull considers NaN and None to be equivalent.
Expand Down
11 changes: 7 additions & 4 deletions pandas/io/tests/json/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ def test_frame_from_json_to_json(self):
def _check_orient(df, orient, dtype=None, numpy=False,
convert_axes=True, check_dtype=True, raise_ok=None,
sort=None, check_index_type=True,
check_column_type=True):
check_column_type=True, check_numpy_dtype=False):
if sort is not None:
df = df.sort_values(sort)
else:
Expand Down Expand Up @@ -181,22 +181,25 @@ def _check_orient(df, orient, dtype=None, numpy=False,
unser.index.values.astype('i8') * 1e6)
if orient == "records":
# index is not captured in this orientation
assert_almost_equal(df.values, unser.values)
assert_almost_equal(df.values, unser.values,
check_dtype=check_numpy_dtype)
self.assertTrue(df.columns.equals(unser.columns))
elif orient == "values":
# index and cols are not captured in this orientation
if numpy is True and df.shape == (0, 0):
assert unser.shape[0] == 0
else:
assert_almost_equal(df.values, unser.values)
assert_almost_equal(df.values, unser.values,
check_dtype=check_numpy_dtype)
elif orient == "split":
# index and col labels might not be strings
unser.index = [str(i) for i in unser.index]
unser.columns = [str(i) for i in unser.columns]

if sort is None:
unser = unser.sort_index()
assert_almost_equal(df.values, unser.values)
assert_almost_equal(df.values, unser.values,
check_dtype=check_numpy_dtype)
else:
if convert_axes:
assert_frame_equal(df, unser, check_dtype=check_dtype,
Expand Down
11 changes: 7 additions & 4 deletions pandas/sparse/tests/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,8 @@ def test_constructor_bool(self):
self.assertEqual(arr.dtype, bool)
tm.assert_numpy_array_equal(arr.sp_values, np.array([True, True]))
tm.assert_numpy_array_equal(arr.sp_values, np.asarray(arr))
tm.assert_numpy_array_equal(arr.sp_index.indices, np.array([2, 3]))
tm.assert_numpy_array_equal(arr.sp_index.indices,
np.array([2, 3], np.int32))

for dense in [arr.to_dense(), arr.values]:
self.assertEqual(dense.dtype, bool)
Expand All @@ -297,9 +298,11 @@ def test_constructor_float32(self):
arr = SparseArray(data, dtype=np.float32)

self.assertEqual(arr.dtype, np.float32)
tm.assert_numpy_array_equal(arr.sp_values, np.array([1, 3]))
tm.assert_numpy_array_equal(arr.sp_values,
np.array([1, 3], dtype=np.float32))
tm.assert_numpy_array_equal(arr.sp_values, np.asarray(arr))
tm.assert_numpy_array_equal(arr.sp_index.indices, np.array([0, 2]))
tm.assert_numpy_array_equal(arr.sp_index.indices,
np.array([0, 2], dtype=np.int32))

for dense in [arr.to_dense(), arr.values]:
self.assertEqual(dense.dtype, np.float32)
Expand Down Expand Up @@ -516,7 +519,7 @@ def test_fillna_overlap(self):
# filling with existing value doesn't replace existing value with
# fill_value, i.e. existing 3 remains in sp_values
res = s.fillna(3)
exp = np.array([1, 3, 3, 3, 3])
exp = np.array([1, 3, 3, 3, 3], dtype=np.float64)
tm.assert_numpy_array_equal(res.to_dense(), exp)

s = SparseArray([1, np.nan, np.nan, 3, np.nan], fill_value=0)
Expand Down
101 changes: 47 additions & 54 deletions pandas/src/testing.pyx
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import numpy as np

from pandas import compat
from pandas.core.common import isnull, array_equivalent
from pandas.core.common import isnull, array_equivalent, is_dtype_equal

cdef NUMERIC_TYPES = (
bool,
Expand Down Expand Up @@ -55,7 +55,7 @@ cpdef assert_dict_equal(a, b, bint compare_keys=True):

return True

cpdef assert_almost_equal(a, b, bint check_less_precise=False,
cpdef assert_almost_equal(a, b, bint check_less_precise=False, check_dtype=True,
obj=None, lobj=None, robj=None):
"""Check that left and right objects are almost equal.

Expand All @@ -66,6 +66,8 @@ cpdef assert_almost_equal(a, b, bint check_less_precise=False,
check_less_precise : bool, default False
Specify comparison precision.
5 digits (False) or 3 digits (True) after decimal points are compared.
check_dtype: bool, default True
check dtype if both a and b are np.ndarray
obj : str, default None
Specify object name being compared, internally used to show appropriate
assertion message
Expand All @@ -82,7 +84,7 @@ cpdef assert_almost_equal(a, b, bint check_less_precise=False,
double diff = 0.0
Py_ssize_t i, na, nb
double fa, fb
bint is_unequal = False
bint is_unequal = False, a_is_ndarray, b_is_ndarray

if lobj is None:
lobj = a
Expand All @@ -97,36 +99,43 @@ cpdef assert_almost_equal(a, b, bint check_less_precise=False,
assert a == b, "%r != %r" % (a, b)
return True

a_is_ndarray = isinstance(a, np.ndarray)
b_is_ndarray = isinstance(b, np.ndarray)

if obj is None:
if a_is_ndarray or b_is_ndarray:
obj = 'numpy array'
else:
obj = 'Iterable'

if isiterable(a):

if not isiterable(b):
from pandas.util.testing import raise_assert_detail
if obj is None:
obj = 'Iterable'
msg = "First object is iterable, second isn't"
raise_assert_detail(obj, msg, a, b)
from pandas.util.testing import assert_class_equal
# classes can't be the same, to raise error
assert_class_equal(a, b, obj=obj)

assert has_length(a) and has_length(b), (
"Can't compare objects without length, one or both is invalid: "
"(%r, %r)" % (a, b)
)
"(%r, %r)" % (a, b))

if isinstance(a, np.ndarray) and isinstance(b, np.ndarray):
if obj is None:
obj = 'numpy array'
if a_is_ndarray and b_is_ndarray:
na, nb = a.size, b.size
if a.shape != b.shape:
from pandas.util.testing import raise_assert_detail
raise_assert_detail(obj, '{0} shapes are different'.format(obj),
a.shape, b.shape)

if check_dtype and not is_dtype_equal(a, b):
from pandas.util.testing import assert_attr_equal
assert_attr_equal('dtype', a, b, obj=obj)

try:
if array_equivalent(a, b, strict_nan=True):
return True
except:
pass
else:
if obj is None:
obj = 'Iterable'
na, nb = len(a), len(b)

if na != nb:
Expand All @@ -149,54 +158,38 @@ cpdef assert_almost_equal(a, b, bint check_less_precise=False,
return True

elif isiterable(b):
from pandas.util.testing import raise_assert_detail
if obj is None:
obj = 'Iterable'
msg = "Second object is iterable, first isn't"
raise_assert_detail(obj, msg, a, b)
from pandas.util.testing import assert_class_equal
# classes can't be the same, to raise error
assert_class_equal(a, b, obj=obj)

if isnull(a):
assert isnull(b), (
"First object is null, second isn't: %r != %r" % (a, b)
)
if a == b:
# object comparison
return True
elif isnull(b):
assert isnull(a), (
"First object is not null, second is null: %r != %r" % (a, b)
)
if isnull(a) and isnull(b):
# nan / None comparison
return True

if is_comparable_as_number(a):
assert is_comparable_as_number(b), (
"First object is numeric, second is not: %r != %r" % (a, b)
)
if is_comparable_as_number(a) and is_comparable_as_number(b):
if array_equivalent(a, b, strict_nan=True):
# inf comparison
return True

decimal = 5

# deal with differing dtypes
if check_less_precise:
decimal = 3

if np.isinf(a):
assert np.isinf(b), "First object is inf, second isn't"
if np.isposinf(a):
assert np.isposinf(b), "First object is positive inf, second is negative inf"
else:
assert np.isneginf(b), "First object is negative inf, second is positive inf"
fa, fb = a, b

# case for zero
if abs(fa) < 1e-5:
if not decimal_almost_equal(fa, fb, decimal):
assert False, (
'(very low values) expected %.5f but got %.5f, with decimal %d' % (fb, fa, decimal)
)
else:
fa, fb = a, b

# case for zero
if abs(fa) < 1e-5:
if not decimal_almost_equal(fa, fb, decimal):
assert False, (
'(very low values) expected %.5f but got %.5f, with decimal %d' % (fb, fa, decimal)
)
else:
if not decimal_almost_equal(1, fb / fa, decimal):
assert False, 'expected %.5f but got %.5f, with decimal %d' % (fb, fa, decimal)

else:
assert a == b, "%r != %r" % (a, b)
if not decimal_almost_equal(1, fb / fa, decimal):
assert False, 'expected %.5f but got %.5f, with decimal %d' % (fb, fa, decimal)
return True

return True
raise AssertionError("{0} != {1}".format(a, b))
9 changes: 5 additions & 4 deletions pandas/tests/frame/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1430,17 +1430,18 @@ def test_setitem_frame(self):

# already aligned
f = self.mixed_frame.copy()
piece = DataFrame([[1, 2], [3, 4]], index=f.index[
0:2], columns=['A', 'B'])
piece = DataFrame([[1., 2.], [3., 4.]],
index=f.index[0:2], columns=['A', 'B'])
key = (slice(None, 2), ['A', 'B'])
f.ix[key] = piece
assert_almost_equal(f.ix[0:2, ['A', 'B']].values,
piece.values)

# rows unaligned
f = self.mixed_frame.copy()
piece = DataFrame([[1, 2], [3, 4], [5, 6], [7, 8]], index=list(
f.index[0:2]) + ['foo', 'bar'], columns=['A', 'B'])
piece = DataFrame([[1., 2.], [3., 4.], [5., 6.], [7., 8.]],
index=list(f.index[0:2]) + ['foo', 'bar'],
columns=['A', 'B'])
key = (slice(None, 2), ['A', 'B'])
f.ix[key] = piece
assert_almost_equal(f.ix[0:2:, ['A', 'B']].values,
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/indexes/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,7 @@ def test_argsort(self):

result = ind.argsort()
expected = np.array(ind).argsort()
tm.assert_numpy_array_equal(result, expected)
tm.assert_numpy_array_equal(result, expected, check_dtype=False)

def test_numpy_argsort(self):
for k, ind in self.indices.items():
Expand Down
55 changes: 31 additions & 24 deletions pandas/tests/indexes/test_category.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,13 +216,15 @@ def test_map(self):
ordered=False)
tm.assert_categorical_equal(result, exp)

tm.assert_numpy_array_equal(ci.map(lambda x: 1), np.array([1] * 5))
tm.assert_numpy_array_equal(ci.map(lambda x: 1),
np.array([1] * 5, dtype=np.int64))

# change categories dtype
ci = pd.CategoricalIndex(list('ABABC'), categories=list('BAC'),
ordered=False)
def f(x):
return {'A': 10, 'B': 20, 'C': 30}.get(x)

result = ci.map(f)
exp = pd.Categorical([10, 20, 10, 20, 30], categories=[20, 10, 30],
ordered=False)
Expand Down Expand Up @@ -340,30 +342,35 @@ def test_reindexing(self):
tm.assert_numpy_array_equal(expected, actual)

def test_reindex_dtype(self):
res, indexer = CategoricalIndex(['a', 'b', 'c', 'a']).reindex(['a', 'c'
])
c = CategoricalIndex(['a', 'b', 'c', 'a'])
res, indexer = c.reindex(['a', 'c'])
tm.assert_index_equal(res, Index(['a', 'a', 'c']), exact=True)
tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2]))

res, indexer = CategoricalIndex(['a', 'b', 'c', 'a']).reindex(
Categorical(['a', 'c']))
tm.assert_index_equal(res, CategoricalIndex(
['a', 'a', 'c'], categories=['a', 'c']), exact=True)
tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2]))

res, indexer = CategoricalIndex(
['a', 'b', 'c', 'a'
], categories=['a', 'b', 'c', 'd']).reindex(['a', 'c'])
tm.assert_index_equal(res, Index(
['a', 'a', 'c'], dtype='object'), exact=True)
tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2]))

res, indexer = CategoricalIndex(
['a', 'b', 'c', 'a'],
categories=['a', 'b', 'c', 'd']).reindex(Categorical(['a', 'c']))
tm.assert_index_equal(res, CategoricalIndex(
['a', 'a', 'c'], categories=['a', 'c']), exact=True)
tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2]))
tm.assert_numpy_array_equal(indexer,
np.array([0, 3, 2], dtype=np.int64))

c = CategoricalIndex(['a', 'b', 'c', 'a'])
res, indexer = c.reindex(Categorical(['a', 'c']))

exp = CategoricalIndex(['a', 'a', 'c'], categories=['a', 'c'])
tm.assert_index_equal(res, exp, exact=True)
tm.assert_numpy_array_equal(indexer,
np.array([0, 3, 2], dtype=np.int64))

c = CategoricalIndex(['a', 'b', 'c', 'a'],
categories=['a', 'b', 'c', 'd'])
res, indexer = c.reindex(['a', 'c'])
exp = Index(['a', 'a', 'c'], dtype='object')
tm.assert_index_equal(res, exp, exact=True)
tm.assert_numpy_array_equal(indexer,
np.array([0, 3, 2], dtype=np.int64))

c = CategoricalIndex(['a', 'b', 'c', 'a'],
categories=['a', 'b', 'c', 'd'])
res, indexer = c.reindex(Categorical(['a', 'c']))
exp = CategoricalIndex(['a', 'a', 'c'], categories=['a', 'c'])
tm.assert_index_equal(res, exp, exact=True)
tm.assert_numpy_array_equal(indexer,
np.array([0, 3, 2], dtype=np.int64))

def test_duplicates(self):

Expand Down
Loading