From aa672b9b8925806d9458b6ba1057de5710745aaa Mon Sep 17 00:00:00 2001 From: Egor Dranischnikow Date: Wed, 1 Aug 2018 17:12:47 +0200 Subject: [PATCH 1/6] BUG: ensuring that np.asarray() simple handles data as objects and doesn't try to do smart things (GH22160) --- pandas/core/algorithms.py | 2 +- pandas/tests/test_algos.py | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 4bf62b021cddc..1757a73bf4473 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -134,7 +134,7 @@ def _ensure_data(values, dtype=None): return values, dtype, 'int64' # we have failed, return object - values = np.asarray(values) + values = np.asarray(values, dtype=np.object) return ensure_object(values), 'object', 'object' diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index f89c7545765c9..6d4d8d62b1f50 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -615,6 +615,24 @@ def test_categorical_from_codes(self): result = algos.isin(Sd, St) tm.assert_numpy_array_equal(expected, result) + def test_same_object_is_in(self): + # GH 22160 + # nan is special, because out of a is a doesn't follow a == a + comps = ['ss', np.nan] + values = [np.nan] + expected = np.array([False, True]) + result = algos.isin(comps, values) + tm.assert_numpy_array_equal(expected, result) + + def test_no_cast(self): + # GH 22160 + # ensure 42 is not casted to string + comps = ['ss', 42] + values = ['42'] + expected = np.array([False, False]) + result = algos.isin(comps, values) + tm.assert_numpy_array_equal(expected, result) + @pytest.mark.parametrize("empty", [[], Series(), np.array([])]) def test_empty(self, empty): # see gh-16991 From eb2da2018bc56afc9fbc0377952fe11db4ec75b9 Mon Sep 17 00:00:00 2001 From: Egor Dranischnikow Date: Thu, 2 Aug 2018 12:10:19 +0200 Subject: [PATCH 2/6] reworking test cases --- pandas/tests/test_algos.py | 34 ++++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 6d4d8d62b1f50..f29b172265001 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -617,16 +617,42 @@ def test_categorical_from_codes(self): def test_same_object_is_in(self): # GH 22160 - # nan is special, because out of a is a doesn't follow a == a - comps = ['ss', np.nan] + # nan is special, because from " a is b" doesn't follow "a == b" + # casting to -> np.float64 -> float-object will results in another nan-object + comps = [np.nan] # could be casted to float64 values = [np.nan] - expected = np.array([False, True]) + expected = np.array([True]) result = algos.isin(comps, values) tm.assert_numpy_array_equal(expected, result) + def test_different_nans(self): + # GH 22160 + # the current behavior is: + # * list, array of objects: isin() is False for different nan-objects + # * array of float64s: isin() is True for all nans + # this behavior might be changed in the future + # + # this test case only ensures it doesn't happen accidentally + # + comps = [float('nan')] + values = [float('nan')] + assert comps[0] is not values[0] # different nan-objects + + # as list of python-objects: + result = algos.isin(comps, values) + tm.assert_numpy_array_equal(np.array([False]), result) + + # as object-array: + result = algos.isin(np.asarray(comps, dtype=np.object), np.asarray(values, dtype=np.object)) + tm.assert_numpy_array_equal(np.array([False]), result) + + #as float64-array: + result = algos.isin(np.asarray(comps, dtype=np.float64), np.asarray(values, dtype=np.float64)) + tm.assert_numpy_array_equal(np.array([True]), result) + def test_no_cast(self): # GH 22160 - # ensure 42 is not casted to string + # ensure 42 is not casted to a string comps = ['ss', 42] values = ['42'] expected = np.array([False, False]) From 0aa9b53651be5729cb13b3bf2d1c82b0d704791e Mon Sep 17 00:00:00 2001 From: Egor Dranischnikow Date: Thu, 2 Aug 2018 12:17:16 +0200 Subject: [PATCH 3/6] add entry in whatsnew --- doc/source/whatsnew/v0.24.0.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index f26d3d76592d0..c959f73f7e513 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -685,6 +685,5 @@ Other - :meth: `~pandas.io.formats.style.Styler.background_gradient` now takes a ``text_color_threshold`` parameter to automatically lighten the text color based on the luminance of the background color. This improves readability with dark background colors without the need to limit the background colormap range. (:issue:`21258`) - Require at least 0.28.2 version of ``cython`` to support read-only memoryviews (:issue:`21688`) - :meth: `~pandas.io.formats.style.Styler.background_gradient` now also supports tablewise application (in addition to rowwise and columnwise) with ``axis=None`` (:issue:`15204`) -- -- +- :meth:`pandas.core.algorithms.isin` avoids spurious casting for lists (:issue:`22160`) - From 5c0ecc112cf72d5cc579a87e70c882c3bf2e6ed9 Mon Sep 17 00:00:00 2001 From: Egor Dranischnikow Date: Fri, 3 Aug 2018 18:18:10 +0200 Subject: [PATCH 4/6] adding yet another testcase and fixing pep8-problems --- pandas/tests/test_algos.py | 35 +++++++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index f29b172265001..5a47239f8c6b2 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -615,16 +615,37 @@ def test_categorical_from_codes(self): result = algos.isin(Sd, St) tm.assert_numpy_array_equal(expected, result) - def test_same_object_is_in(self): + def test_same_nan_is_in(self): # GH 22160 # nan is special, because from " a is b" doesn't follow "a == b" - # casting to -> np.float64 -> float-object will results in another nan-object + # at least, isin() should follow python's "np.nan in [nan] == True" + # casting to -> np.float64 -> another float-object somewher on + # the way could lead jepardize this behavior comps = [np.nan] # could be casted to float64 values = [np.nan] expected = np.array([True]) result = algos.isin(comps, values) tm.assert_numpy_array_equal(expected, result) + def test_same_object_is_in(self): + # GH 22160 + # there could be special treatment for nans + # the user however could define a custom class + # with similar behavior, then we at least should + # fall back to usual python's behavior: "a in [a] == True" + class LikeNan(object): + def __eq__(self): + return False + + def __hash__(self): + return 0 + + a, b = LikeNan(), LikeNan() + # same object -> True + tm.assert_numpy_array_equal(algos.isin([a], [a]), np.array([True])) + # different objects -> False + tm.assert_numpy_array_equal(algos.isin([a], [b]), np.array([False])) + def test_different_nans(self): # GH 22160 # the current behavior is: @@ -634,7 +655,7 @@ def test_different_nans(self): # # this test case only ensures it doesn't happen accidentally # - comps = [float('nan')] + comps = [float('nan')] values = [float('nan')] assert comps[0] is not values[0] # different nan-objects @@ -643,11 +664,13 @@ def test_different_nans(self): tm.assert_numpy_array_equal(np.array([False]), result) # as object-array: - result = algos.isin(np.asarray(comps, dtype=np.object), np.asarray(values, dtype=np.object)) + result = algos.isin(np.asarray(comps, dtype=np.object), + np.asarray(values, dtype=np.object)) tm.assert_numpy_array_equal(np.array([False]), result) - #as float64-array: - result = algos.isin(np.asarray(comps, dtype=np.float64), np.asarray(values, dtype=np.float64)) + # as float64-array: + result = algos.isin(np.asarray(comps, dtype=np.float64), + np.asarray(values, dtype=np.float64)) tm.assert_numpy_array_equal(np.array([True]), result) def test_no_cast(self): From f11b14a67ab21ea765a6b406e0c173b162e355f2 Mon Sep 17 00:00:00 2001 From: Egor Dranischnikow Date: Thu, 9 Aug 2018 22:05:04 +0200 Subject: [PATCH 5/6] #22207 changed the behavior with different nan-objecst, thus adjusting the test cases --- pandas/tests/test_algos.py | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 5a47239f8c6b2..f118ee3ae0ed1 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -648,25 +648,20 @@ def __hash__(self): def test_different_nans(self): # GH 22160 - # the current behavior is: - # * list, array of objects: isin() is False for different nan-objects - # * array of float64s: isin() is True for all nans - # this behavior might be changed in the future - # - # this test case only ensures it doesn't happen accidentally - # + # all nans are handled as equivalent + comps = [float('nan')] values = [float('nan')] assert comps[0] is not values[0] # different nan-objects # as list of python-objects: result = algos.isin(comps, values) - tm.assert_numpy_array_equal(np.array([False]), result) + tm.assert_numpy_array_equal(np.array([True]), result) # as object-array: result = algos.isin(np.asarray(comps, dtype=np.object), np.asarray(values, dtype=np.object)) - tm.assert_numpy_array_equal(np.array([False]), result) + tm.assert_numpy_array_equal(np.array([True]), result) # as float64-array: result = algos.isin(np.asarray(comps, dtype=np.float64), From ab06c38fc23614275f3a6c317508fb27715db203 Mon Sep 17 00:00:00 2001 From: Egor Dranischnikow Date: Fri, 10 Aug 2018 00:40:51 +0200 Subject: [PATCH 6/6] no whatsnew needed --- doc/source/whatsnew/v0.24.0.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index c959f73f7e513..f26d3d76592d0 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -685,5 +685,6 @@ Other - :meth: `~pandas.io.formats.style.Styler.background_gradient` now takes a ``text_color_threshold`` parameter to automatically lighten the text color based on the luminance of the background color. This improves readability with dark background colors without the need to limit the background colormap range. (:issue:`21258`) - Require at least 0.28.2 version of ``cython`` to support read-only memoryviews (:issue:`21688`) - :meth: `~pandas.io.formats.style.Styler.background_gradient` now also supports tablewise application (in addition to rowwise and columnwise) with ``axis=None`` (:issue:`15204`) -- :meth:`pandas.core.algorithms.isin` avoids spurious casting for lists (:issue:`22160`) +- +- -