From 3519667670914b404c274056dca9cbf3455aa3c8 Mon Sep 17 00:00:00 2001 From: Dr-Irv Date: Fri, 13 Apr 2018 17:12:53 -0400 Subject: [PATCH 1/2] BUG: Series.is_unique has extraneous output if containing objects with __ne__ defined --- doc/source/whatsnew/v0.23.0.txt | 2 ++ pandas/_libs/hashtable_class_helper.pxi.in | 2 +- pandas/tests/series/test_analytics.py | 16 ++++++++++++++++ 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 107ce7855a00d..b89791a2ef2d5 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -1048,3 +1048,5 @@ Other - Improved error message when attempting to use a Python keyword as an identifier in a ``numexpr`` backed query (:issue:`18221`) - Bug in accessing a :func:`pandas.get_option`, which raised ``KeyError`` rather than ``OptionError`` when looking up a non-existant option key in some cases (:issue:`19789`) - :func:`DataFrame.plot` now supports multiple columns to the ``y`` argument (:issue:`19699`) +- Bug in ``Series.is_unique`` where extraneous output in stderr is shown if Series contains objects with ``__ne__`` defined (:issue:`20661`) + \ No newline at end of file diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in index bca4e388f3279..1e5d814d2b3b9 100644 --- a/pandas/_libs/hashtable_class_helper.pxi.in +++ b/pandas/_libs/hashtable_class_helper.pxi.in @@ -855,7 +855,7 @@ cdef class PyObjectHashTable(HashTable): for i in range(n): val = values[i] hash(val) - if not _checknan(val): + if not checknull(val): k = kh_get_pymap(self.table, val) if k == self.table.n_buckets: kh_put_pymap(self.table, val, &ret) diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index 0e6e44e839464..5d26d088e0e7f 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -1548,6 +1548,22 @@ def test_is_unique(self): s = Series(np.arange(1000)) assert s.is_unique + def test_is_unique_class_ne(self, capsys): + # GH 20661 + class Foo(object): + def __init__(self, val): + self._value = val + + def __ne__(self, other): + raise Exception("NEQ not supported") + + li = [Foo(i) for i in range(5)] + s = pd.Series(li, index=[i for i in range(5)]) + _, err = capsys.readouterr() + s.is_unique + _, err = capsys.readouterr() + assert len(err) == 0 + def test_is_monotonic(self): s = Series(np.random.randint(0, 10, size=1000)) From ee71a901c0236fbb90050a901aac1f965f2e5a72 Mon Sep 17 00:00:00 2001 From: Dr-Irv Date: Sat, 14 Apr 2018 13:36:15 -0400 Subject: [PATCH 2/2] remove checknan and update whatsnew --- doc/source/whatsnew/v0.23.0.txt | 2 +- pandas/_libs/hashtable.pyx | 1 - pandas/_libs/src/util.pxd | 3 --- 3 files changed, 1 insertion(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index f08e136a8d7e0..32166db08f8ad 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -1068,6 +1068,7 @@ Indexing - Bug in :meth:`Index.difference` when taking difference of an ``Index`` with itself (:issue:`20040`) - Bug in :meth:`DataFrame.first_valid_index` and :meth:`DataFrame.last_valid_index` in presence of entire rows of NaNs in the middle of values (:issue:`20499`). - Bug in :class:`IntervalIndex` where some indexing operations were not supported for overlapping or non-monotonic ``uint64`` data (:issue:`20636`) +- Bug in ``Series.is_unique`` where extraneous output in stderr is shown if Series contains objects with ``__ne__`` defined (:issue:`20661`) MultiIndex ^^^^^^^^^^ @@ -1175,4 +1176,3 @@ Other - Improved error message when attempting to use a Python keyword as an identifier in a ``numexpr`` backed query (:issue:`18221`) - Bug in accessing a :func:`pandas.get_option`, which raised ``KeyError`` rather than ``OptionError`` when looking up a non-existant option key in some cases (:issue:`19789`) - Bug in :func:`assert_series_equal` and :func:`assert_frame_equal` for Series or DataFrames with differing unicode data (:issue:`20503`) -- Bug in ``Series.is_unique`` where extraneous output in stderr is shown if Series contains objects with ``__ne__`` defined (:issue:`20661`) diff --git a/pandas/_libs/hashtable.pyx b/pandas/_libs/hashtable.pyx index 15d93374da3a9..b9a72a0c8285f 100644 --- a/pandas/_libs/hashtable.pyx +++ b/pandas/_libs/hashtable.pyx @@ -39,7 +39,6 @@ from khash cimport ( kh_put_pymap, kh_resize_pymap) -from util cimport _checknan cimport util from missing cimport checknull diff --git a/pandas/_libs/src/util.pxd b/pandas/_libs/src/util.pxd index 5030b742849f8..d8249ec130f4d 100644 --- a/pandas/_libs/src/util.pxd +++ b/pandas/_libs/src/util.pxd @@ -159,8 +159,5 @@ cdef inline bint _checknull(object val): except ValueError: return False -cdef inline bint _checknan(object val): - return not cnp.PyArray_Check(val) and val != val - cdef inline bint is_period_object(object val): return getattr(val, '_typ', '_typ') == 'period'