From 9fceb515f545f4f75a7f91aa388bca968a96631d Mon Sep 17 00:00:00 2001 From: Pietro Battiston Date: Wed, 30 Aug 2017 08:23:57 +0200 Subject: [PATCH 1/3] BUG: Try to sort result of Index.union rather than guessing sortability closes #17376 --- doc/source/whatsnew/v0.23.0.txt | 1 + pandas/core/indexes/base.py | 28 ++++----------- pandas/tests/indexes/test_base.py | 51 +++++++++++---------------- pandas/tests/series/test_operators.py | 26 +++----------- 4 files changed, 33 insertions(+), 73 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 302f8043f3ba7..5a2a878f1e160 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -920,6 +920,7 @@ Numeric Indexing ^^^^^^^^ +- Bug in the order of the result of ``Index.union()`` when indexes contain tuples (:issue:`17376`) - Bug in :class:`Index` construction from list of mixed type tuples (:issue:`18505`) - Bug in :func:`Index.drop` when passing a list of both tuples and non-tuples (:issue:`18304`) - Bug in :meth:`~DataFrame.drop`, :meth:`~Panel.drop`, :meth:`~Series.drop`, :meth:`~Index.drop` where no ``KeyError`` is raised when dropping a non-existent element from an axis that contains duplicates (:issue:`19186`) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 69a07a91838e1..7e7f99031a877 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2406,35 +2406,21 @@ def union(self, other): value_set = set(lvals) result.extend([x for x in rvals if x not in value_set]) else: - indexer = self.get_indexer(other) - indexer, = (indexer == -1).nonzero() - + indexer = np.where(self.get_indexer(other) == -1)[0] if len(indexer) > 0: other_diff = algos.take_nd(rvals, indexer, allow_fill=False) result = _concat._concat_compat((lvals, other_diff)) - try: - lvals[0] < other_diff[0] - except TypeError as e: - warnings.warn("%s, sort order is undefined for " - "incomparable objects" % e, RuntimeWarning, - stacklevel=3) - else: - types = frozenset((self.inferred_type, - other.inferred_type)) - if not types & _unsortable_types: - result.sort() - else: result = lvals - try: - result = np.sort(result) - except TypeError as e: - warnings.warn("%s, sort order is undefined for " - "incomparable objects" % e, RuntimeWarning, - stacklevel=3) + try: + result = sorting.safe_sort(result) + except TypeError as e: + warnings.warn("%s, sort order is undefined for " + "incomparable objects" % e, RuntimeWarning, + stacklevel=3) # for subclasses return self._wrap_union_result(other, result) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 964a6b14d2b1e..5f3e43212620a 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -784,8 +784,7 @@ def test_union(self): expected = Index(list('ab'), name='A') tm.assert_index_equal(union, expected) - with tm.assert_produces_warning(RuntimeWarning): - firstCat = self.strIndex.union(self.dateIndex) + firstCat = self.strIndex.union(self.dateIndex) secondCat = self.strIndex.union(self.strIndex) if self.dateIndex.dtype == np.object_: @@ -1462,19 +1461,19 @@ def test_tuple_union_bug(self): (2, 'B'), (1, 'C'), (2, 'C')], dtype=[('num', int), ('let', 'a1')]) - idx1 = pandas.Index(aidx1) - idx2 = pandas.Index(aidx2) + idx1 = Index(aidx1) + idx2 = Index(aidx2) - # intersection broken? + # intersection int_idx = idx1.intersection(idx2) + expected = idx1 # pandas.Index(sorted(set(idx1) & set(idx2))) # needs to be 1d like idx1 and idx2 - expected = idx1[:4] # pandas.Index(sorted(set(idx1) & set(idx2))) assert int_idx.ndim == 1 tm.assert_index_equal(int_idx, expected) - # union broken + # GH 17376 (union) union_idx = idx1.union(idx2) - expected = idx2 + expected = idx2.sort_values() assert union_idx.ndim == 1 tm.assert_index_equal(union_idx, expected) @@ -1664,13 +1663,19 @@ def test_outer_join_sort(self): left_idx = Index(np.random.permutation(15)) right_idx = tm.makeDateIndex(10) - with tm.assert_produces_warning(RuntimeWarning): + if PY3: + with tm.assert_produces_warning(RuntimeWarning): + joined = left_idx.join(right_idx, how='outer') + else: joined = left_idx.join(right_idx, how='outer') # right_idx in this case because DatetimeIndex has join precedence over # Int64Index - with tm.assert_produces_warning(RuntimeWarning): - expected = right_idx.astype(object).union(left_idx.astype(object)) + if PY3: + with tm.assert_produces_warning(RuntimeWarning): + expected = right_idx.astype(object).union(left_idx) + else: + expected = right_idx.astype(object).union(left_idx) tm.assert_index_equal(joined, expected) def test_nan_first_take_datetime(self): @@ -2059,10 +2064,7 @@ def test_copy_name(self): s1 = Series(2, index=first) s2 = Series(3, index=second[:-1]) - warning_type = RuntimeWarning if PY3 else None - with tm.assert_produces_warning(warning_type): - # Python 3: Unorderable types - s3 = s1 * s2 + s3 = s1 * s2 assert s3.index.name == 'mario' @@ -2095,27 +2097,14 @@ def test_union_base(self): first = idx[3:] second = idx[:5] - if PY3: - with tm.assert_produces_warning(RuntimeWarning): - # unorderable types - result = first.union(second) - expected = Index(['b', 2, 'c', 0, 'a', 1]) - tm.assert_index_equal(result, expected) - else: - result = first.union(second) - expected = Index(['b', 2, 'c', 0, 'a', 1]) - tm.assert_index_equal(result, expected) + expected = Index([0, 1, 2, 'a', 'b', 'c']) + result = first.union(second) + tm.assert_index_equal(result, expected) # GH 10149 cases = [klass(second.values) for klass in [np.array, Series, list]] for case in cases: - if PY3: - with tm.assert_produces_warning(RuntimeWarning): - # unorderable types - result = first.union(case) - assert tm.equalContents(result, idx) - else: result = first.union(case) assert tm.equalContents(result, idx) diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index f90fcce973f00..4163ccfa1b31a 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -431,11 +431,7 @@ def test_comparison_label_based(self): assert_series_equal(result, a[a]) for e in [Series(['z'])]: - if compat.PY3: - with tm.assert_produces_warning(RuntimeWarning): - result = a[a | e] - else: - result = a[a | e] + result = a[a | e] assert_series_equal(result, a[a]) # vs scalars @@ -1472,24 +1468,12 @@ def test_operators_bitwise(self): pytest.raises(TypeError, lambda: s_0123 & [0.1, 4, 3.14, 2]) # s_0123 will be all false now because of reindexing like s_tft - if compat.PY3: - # unable to sort incompatible object via .union. - exp = Series([False] * 7, index=['b', 'c', 'a', 0, 1, 2, 3]) - with tm.assert_produces_warning(RuntimeWarning): - assert_series_equal(s_tft & s_0123, exp) - else: - exp = Series([False] * 7, index=[0, 1, 2, 3, 'a', 'b', 'c']) - assert_series_equal(s_tft & s_0123, exp) + exp = Series([False] * 7, index=[0, 1, 2, 3, 'a', 'b', 'c']) + assert_series_equal(s_tft & s_0123, exp) # s_tft will be all false now because of reindexing like s_0123 - if compat.PY3: - # unable to sort incompatible object via .union. - exp = Series([False] * 7, index=[0, 1, 2, 3, 'b', 'c', 'a']) - with tm.assert_produces_warning(RuntimeWarning): - assert_series_equal(s_0123 & s_tft, exp) - else: - exp = Series([False] * 7, index=[0, 1, 2, 3, 'a', 'b', 'c']) - assert_series_equal(s_0123 & s_tft, exp) + exp = Series([False] * 7, index=[0, 1, 2, 3, 'a', 'b', 'c']) + assert_series_equal(s_0123 & s_tft, exp) assert_series_equal(s_0123 & False, Series([False] * 4)) assert_series_equal(s_0123 ^ False, Series([False, True, True, True])) From 7e70fd7a142ad8409b235b187977a70fe25dcf5d Mon Sep 17 00:00:00 2001 From: gfyoung Date: Fri, 2 Mar 2018 19:32:34 -0500 Subject: [PATCH 2/3] CLN: Fix flake8 errors --- pandas/tests/indexes/test_base.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 5f3e43212620a..619fbd2f5a802 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -1452,9 +1452,6 @@ def test_drop_tuple(self, values, to_drop): pytest.raises(KeyError, removed.drop, drop_me) def test_tuple_union_bug(self): - import pandas - import numpy as np - aidx1 = np.array([(1, 'A'), (2, 'A'), (1, 'B'), (2, 'B')], dtype=[('num', int), ('let', 'a1')]) aidx2 = np.array([(1, 'A'), (2, 'A'), (1, 'B'), From 899c0408f3313f592817e1124c11f2db3a0f561f Mon Sep 17 00:00:00 2001 From: gfyoung Date: Tue, 6 Mar 2018 21:28:02 -0500 Subject: [PATCH 3/3] [DO NOT MERGE] Is it pytest-xdist? This will probably slow down performance, but I can't see where the bug is in the changes. --- appveyor.yml | 2 +- test.bat | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/appveyor.yml b/appveyor.yml index ba001208864a8..4269784fee034 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -73,7 +73,7 @@ install: - cmd: conda info -a # create our env - - cmd: conda create -n pandas python=%PYTHON_VERSION% cython pytest>=3.1.0 pytest-xdist + - cmd: conda create -n pandas python=%PYTHON_VERSION% cython pytest>=3.1.0 - cmd: activate pandas - cmd: pip install moto - SET REQ=ci\requirements-%PYTHON_VERSION%_WIN.run diff --git a/test.bat b/test.bat index e07c84f257a69..358f0feb24a30 100644 --- a/test.bat +++ b/test.bat @@ -1,3 +1,3 @@ :: test on windows -pytest --skip-slow --skip-network pandas -n 2 -r sxX --strict %* +pytest -v --skip-slow --skip-network pandas -r sxX --strict %*