diff --git a/doc/source/whatsnew/v0.23.4.txt b/doc/source/whatsnew/v0.23.4.txt index a88c22e3d01f7..9c2ef7ecb601c 100644 --- a/doc/source/whatsnew/v0.23.4.txt +++ b/doc/source/whatsnew/v0.23.4.txt @@ -31,7 +31,7 @@ Bug Fixes **Conversion** -- +- Bug where unwanted casting of float to int in :func:`isin` led to incorrect comparison outcome (:issue:`21804`) - **Indexing** diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 6e49e8044ff25..3833917f2e419 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -23,7 +23,7 @@ is_period_dtype, is_numeric_dtype, is_float_dtype, is_bool_dtype, needs_i8_conversion, - is_datetimetz, + is_datetimetz, is_datetime_or_timedelta_dtype, is_datetime64_any_dtype, is_datetime64tz_dtype, is_timedelta64_dtype, is_datetimelike, is_interval_dtype, is_scalar, is_list_like, @@ -39,6 +39,8 @@ from pandas.util._decorators import (Appender, Substitution, deprecate_kwarg) +from pandas._libs.tslibs.timestamps import Timestamp + _shared_docs = {} @@ -415,33 +417,40 @@ def isin(comps, values): comps = com._values_from_object(comps) comps, dtype, _ = _ensure_data(comps) - values, _, _ = _ensure_data(values, dtype=dtype) + + is_time_like = lambda x: (is_datetime_or_timedelta_dtype(x) + or isinstance(x, Timestamp)) + + is_int = lambda x: ((x == np.int64) or (x == int)) + is_float = lambda x: ((x == np.float64) or (x == float)) + + if is_time_like(dtype): + values, _, _ = _ensure_data(values, dtype=dtype) + else: + values, _, _ = _ensure_data(values) + + comps_types = set(type(v) for v in comps) + values_types = set(type(v) for v in values) # faster for larger cases to use np.in1d - f = lambda x, y: htable.ismember_object(x, values) + f = lambda x, y: htable.ismember_object(x.astype(object), y.astype(object)) # GH16012 # Ensure np.in1d doesn't get object types or it *may* throw an exception if len(comps) > 1000000 and not is_object_dtype(comps): f = lambda x, y: np.in1d(x, y) - elif is_integer_dtype(comps): - try: + elif len(comps_types) == len(values_types) == 1: + comps_types = comps_types.pop() + values_types = values_types.pop() + if (is_int(comps_types) and is_int(values_types)): values = values.astype('int64', copy=False) comps = comps.astype('int64', copy=False) f = lambda x, y: htable.ismember_int64(x, y) - except (TypeError, ValueError): - values = values.astype(object) - comps = comps.astype(object) - - elif is_float_dtype(comps): - try: + elif (is_float(comps_types) and is_float(values_types)): values = values.astype('float64', copy=False) comps = comps.astype('float64', copy=False) checknull = isna(values).any() f = lambda x, y: htable.ismember_float64(x, y, checknull) - except (TypeError, ValueError): - values = values.astype(object) - comps = comps.astype(object) return f(comps, values) diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 25e64aa82cc36..8ab907a9723bd 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -509,42 +509,23 @@ def test_invalid(self): pytest.raises(TypeError, lambda: algos.isin(1, [1])) pytest.raises(TypeError, lambda: algos.isin([1], 1)) - def test_basic(self): - - result = algos.isin([1, 2], [1]) - expected = np.array([True, False]) - tm.assert_numpy_array_equal(result, expected) - - result = algos.isin(np.array([1, 2]), [1]) - expected = np.array([True, False]) - tm.assert_numpy_array_equal(result, expected) - - result = algos.isin(Series([1, 2]), [1]) - expected = np.array([True, False]) - tm.assert_numpy_array_equal(result, expected) - - result = algos.isin(Series([1, 2]), Series([1])) - expected = np.array([True, False]) - tm.assert_numpy_array_equal(result, expected) - - result = algos.isin(Series([1, 2]), set([1])) - expected = np.array([True, False]) - tm.assert_numpy_array_equal(result, expected) - - result = algos.isin(['a', 'b'], ['a']) - expected = np.array([True, False]) - tm.assert_numpy_array_equal(result, expected) - - result = algos.isin(Series(['a', 'b']), Series(['a'])) - expected = np.array([True, False]) - tm.assert_numpy_array_equal(result, expected) - - result = algos.isin(Series(['a', 'b']), set(['a'])) - expected = np.array([True, False]) - tm.assert_numpy_array_equal(result, expected) - - result = algos.isin(['a', 'b'], [1]) - expected = np.array([False, False]) + @pytest.mark.parametrize("comps,values,expected", [ + ([1, 2], [1], [True, False]), + ([1, 0], [1, 0.5], [True, False]), + ([1.0, 0], [1, 0.5], [True, False]), + ([1.0, 0.0], [1, 0], [True, True]), + (np.array([1, 2]), [1], [True, False]), + (Series([1, 2]), [1], [True, False]), + (Series([1, 2]), Series([1]), [True, False]), + (Series([1, 2]), set([1]), [True, False]), + (['a', 'b'], ['a'], [True, False]), + (Series(['a', 'b']), Series(['a']), [True, False]), + (Series(['a', 'b']), set(['a']), [True, False]), + (['a', 'b'], [1], [False, False]) + ]) + def test_basic(self, comps, values, expected): + result = algos.isin(comps, values) + expected = np.array(expected) tm.assert_numpy_array_equal(result, expected) def test_i8(self):