Skip to content

Commit cc3ab4a

Browse files
realeadjreback
authored andcommitted
BUG: ensuring that np.asarray() simple handles data as objects and doesn't… (#22161)
1 parent c7d6264 commit cc3ab4a

File tree

2 files changed

+63
-1
lines changed

2 files changed

+63
-1
lines changed

pandas/core/algorithms.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ def _ensure_data(values, dtype=None):
134134
return values, dtype, 'int64'
135135

136136
# we have failed, return object
137-
values = np.asarray(values)
137+
values = np.asarray(values, dtype=np.object)
138138
return ensure_object(values), 'object', 'object'
139139

140140

pandas/tests/test_algos.py

+62
Original file line numberDiff line numberDiff line change
@@ -615,6 +615,68 @@ def test_categorical_from_codes(self):
615615
result = algos.isin(Sd, St)
616616
tm.assert_numpy_array_equal(expected, result)
617617

618+
def test_same_nan_is_in(self):
619+
# GH 22160
620+
# nan is special, because from " a is b" doesn't follow "a == b"
621+
# at least, isin() should follow python's "np.nan in [nan] == True"
622+
# casting to -> np.float64 -> another float-object somewher on
623+
# the way could lead jepardize this behavior
624+
comps = [np.nan] # could be casted to float64
625+
values = [np.nan]
626+
expected = np.array([True])
627+
result = algos.isin(comps, values)
628+
tm.assert_numpy_array_equal(expected, result)
629+
630+
def test_same_object_is_in(self):
631+
# GH 22160
632+
# there could be special treatment for nans
633+
# the user however could define a custom class
634+
# with similar behavior, then we at least should
635+
# fall back to usual python's behavior: "a in [a] == True"
636+
class LikeNan(object):
637+
def __eq__(self):
638+
return False
639+
640+
def __hash__(self):
641+
return 0
642+
643+
a, b = LikeNan(), LikeNan()
644+
# same object -> True
645+
tm.assert_numpy_array_equal(algos.isin([a], [a]), np.array([True]))
646+
# different objects -> False
647+
tm.assert_numpy_array_equal(algos.isin([a], [b]), np.array([False]))
648+
649+
def test_different_nans(self):
650+
# GH 22160
651+
# all nans are handled as equivalent
652+
653+
comps = [float('nan')]
654+
values = [float('nan')]
655+
assert comps[0] is not values[0] # different nan-objects
656+
657+
# as list of python-objects:
658+
result = algos.isin(comps, values)
659+
tm.assert_numpy_array_equal(np.array([True]), result)
660+
661+
# as object-array:
662+
result = algos.isin(np.asarray(comps, dtype=np.object),
663+
np.asarray(values, dtype=np.object))
664+
tm.assert_numpy_array_equal(np.array([True]), result)
665+
666+
# as float64-array:
667+
result = algos.isin(np.asarray(comps, dtype=np.float64),
668+
np.asarray(values, dtype=np.float64))
669+
tm.assert_numpy_array_equal(np.array([True]), result)
670+
671+
def test_no_cast(self):
672+
# GH 22160
673+
# ensure 42 is not casted to a string
674+
comps = ['ss', 42]
675+
values = ['42']
676+
expected = np.array([False, False])
677+
result = algos.isin(comps, values)
678+
tm.assert_numpy_array_equal(expected, result)
679+
618680
@pytest.mark.parametrize("empty", [[], Series(), np.array([])])
619681
def test_empty(self, empty):
620682
# see gh-16991

0 commit comments

Comments
 (0)