From 6774684d302ae691c507565faec14f92e572b1b4 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Fri, 13 Jul 2018 22:19:49 +0200 Subject: [PATCH 1/5] TST/CLN: clean up indexes/multi/test_unique_and_duplicates --- pandas/tests/indexes/multi/test_names.py | 6 + .../multi/test_unique_and_duplicates.py | 146 ++++++++++-------- 2 files changed, 91 insertions(+), 61 deletions(-) diff --git a/pandas/tests/indexes/multi/test_names.py b/pandas/tests/indexes/multi/test_names.py index a9fbb55679173..d4fb47e81ca16 100644 --- a/pandas/tests/indexes/multi/test_names.py +++ b/pandas/tests/indexes/multi/test_names.py @@ -115,3 +115,9 @@ def test_names(idx, index_names): ind_names = list(index.names) level_names = [level.name for level in index.levels] assert ind_names == level_names + + +def test_duplicate_level_names_access_raises(idx): + idx.names = ['foo', 'foo'] + tm.assert_raises_regex(ValueError, 'name foo occurs multiple times', + idx._get_level_number, 'foo') diff --git a/pandas/tests/indexes/multi/test_unique_and_duplicates.py b/pandas/tests/indexes/multi/test_unique_and_duplicates.py index c1000e5b6e0f6..774649d3e0552 100644 --- a/pandas/tests/indexes/multi/test_unique_and_duplicates.py +++ b/pandas/tests/indexes/multi/test_unique_and_duplicates.py @@ -7,10 +7,24 @@ import pandas as pd import pandas.util.testing as tm import pytest -from pandas import MultiIndex from pandas.compat import range, u +@pytest.fixture +def idx_dup(): + # compare tests/indexes/multi/conftest.py + major_axis = pd.Index(['foo', 'bar', 'baz', 'qux']) + minor_axis = pd.Index(['one', 'two']) + + major_labels = np.array([0, 0, 1, 0, 1, 1]) + minor_labels = np.array([0, 1, 0, 1, 0, 1]) + index_names = ['first', 'second'] + mi = pd.MultiIndex(levels=[major_axis, minor_axis], + labels=[major_labels, minor_labels], + names=index_names, verify_integrity=False) + return mi + + @pytest.mark.parametrize('names', [None, ['first', 'second']]) def test_unique(names): mi = pd.MultiIndex.from_arrays([[1, 2, 1, 2], [1, 1, 1, 2]], @@ -75,18 +89,28 @@ def test_unique_level(idx, level): expected = mi.get_level_values(level) +@pytest.mark.parametrize('dropna', [True, False]) +def test_get_unique_index(idx, dropna): + mi = idx[[0, 1, 0, 1, 1, 0, 0]] + expected = mi._shallow_copy(mi[[0, 1]]) + + result = mi._get_unique_index(dropna=dropna) + assert result.unique + tm.assert_index_equal(result, expected) + + def test_duplicate_multiindex_labels(): # GH 17464 # Make sure that a MultiIndex with duplicate levels throws a ValueError with pytest.raises(ValueError): - ind = pd.MultiIndex([['A'] * 10, range(10)], [[0] * 10, range(10)]) + mi = pd.MultiIndex([['A'] * 10, range(10)], [[0] * 10, range(10)]) # And that using set_levels with duplicate levels fails - ind = MultiIndex.from_arrays([['A', 'A', 'B', 'B', 'B'], - [1, 2, 1, 2, 3]]) + mi = pd.MultiIndex.from_arrays([['A', 'A', 'B', 'B', 'B'], + [1, 2, 1, 2, 3]]) with pytest.raises(ValueError): - ind.set_levels([['A', 'B', 'A', 'A', 'B'], [2, 1, 3, -2, 5]], - inplace=True) + mi.set_levels([['A', 'B', 'A', 'A', 'B'], [2, 1, 3, -2, 5]], + inplace=True) @pytest.mark.parametrize('names', [['a', 'b', 'a'], [1, 1, 2], @@ -109,27 +133,34 @@ def test_duplicate_level_names(names): def test_duplicate_meta_data(): # GH 10115 - index = MultiIndex( + mi = pd.MultiIndex( levels=[[0, 1], [0, 1, 2]], labels=[[0, 0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 0, 1, 2]]) - for idx in [index, - index.set_names([None, None]), - index.set_names([None, 'Num']), - index.set_names(['Upper', 'Num']), ]: + for idx in [mi, + mi.set_names([None, None]), + mi.set_names([None, 'Num']), + mi.set_names(['Upper', 'Num']), ]: assert idx.has_duplicates assert idx.drop_duplicates().names == idx.names -def test_duplicates(idx): +def test_has_duplicates(idx, idx_dup): + # see fixtures + assert idx.is_unique assert not idx.has_duplicates - assert idx.append(idx).has_duplicates + assert not idx_dup.is_unique + assert idx_dup.has_duplicates + + mi = pd.MultiIndex(levels=[[0, 1], [0, 1, 2]], + labels=[[0, 0, 0, 0, 1, 1, 1], + [0, 1, 2, 0, 0, 1, 2]]) + assert not mi.is_unique + assert mi.has_duplicates - index = MultiIndex(levels=[[0, 1], [0, 1, 2]], labels=[ - [0, 0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 0, 1, 2]]) - assert index.has_duplicates +def test_has_duplicates_from_tuples(): # GH 9075 t = [(u('x'), u('out'), u('z'), 5, u('y'), u('in'), u('z'), 169), (u('x'), u('out'), u('z'), 7, u('y'), u('in'), u('z'), 119), @@ -150,9 +181,11 @@ def test_duplicates(idx): (u('x'), u('out'), u('z'), 33, u('y'), u('in'), u('z'), 123), (u('x'), u('out'), u('z'), 12, u('y'), u('in'), u('z'), 144)] - index = pd.MultiIndex.from_tuples(t) - assert not index.has_duplicates + mi = pd.MultiIndex.from_tuples(t) + assert not mi.has_duplicates + +def test_has_duplicates_overflow(): # handle int64 overflow if possible def check(nlevels, with_nulls): labels = np.tile(np.arange(500), 2) @@ -171,20 +204,20 @@ def check(nlevels, with_nulls): levels = [level] * nlevels + [[0, 1]] # no dups - index = MultiIndex(levels=levels, labels=labels) - assert not index.has_duplicates + mi = pd.MultiIndex(levels=levels, labels=labels) + assert not mi.has_duplicates # with a dup if with_nulls: def f(a): return np.insert(a, 1000, a[0]) labels = list(map(f, labels)) - index = MultiIndex(levels=levels, labels=labels) + mi = pd.MultiIndex(levels=levels, labels=labels) else: - values = index.values.tolist() - index = MultiIndex.from_tuples(values + [values[0]]) + values = mi.values.tolist() + mi = pd.MultiIndex.from_tuples(values + [values[0]]) - assert index.has_duplicates + assert mi.has_duplicates # no overflow check(4, False) @@ -194,66 +227,57 @@ def f(a): check(8, False) check(8, True) - # GH 9125 - n, k = 200, 5000 - levels = [np.arange(n), tm.makeStringIndex(n), 1000 + np.arange(n)] - labels = [np.random.choice(n, k * n) for lev in levels] - mi = MultiIndex(levels=levels, labels=labels) - - for keep in ['first', 'last', False]: - left = mi.duplicated(keep=keep) - right = pd._libs.hashtable.duplicated_object(mi.values, keep=keep) - tm.assert_numpy_array_equal(left, right) +def test_get_duplicates(): # GH5873 for a in [101, 102]: - mi = MultiIndex.from_arrays([[101, a], [3.5, np.nan]]) + mi = pd.MultiIndex.from_arrays([[101, a], [3.5, np.nan]]) assert not mi.has_duplicates with warnings.catch_warnings(record=True): # Deprecated - see GH20239 - assert mi.get_duplicates().equals(MultiIndex.from_arrays( + assert mi.get_duplicates().equals(pd.MultiIndex.from_arrays( [[], []])) - tm.assert_numpy_array_equal(mi.duplicated(), np.zeros( - 2, dtype='bool')) + tm.assert_numpy_array_equal(mi.duplicated(), + np.zeros(2, dtype='bool')) for n in range(1, 6): # 1st level shape for m in range(1, 5): # 2nd level shape # all possible unique combinations, including nan lab = product(range(-1, n), range(-1, m)) - mi = MultiIndex(levels=[list('abcde')[:n], list('WXYZ')[:m]], - labels=np.random.permutation(list(lab)).T) + mi = pd.MultiIndex(levels=[list('abcde')[:n], list('WXYZ')[:m]], + labels=np.random.permutation(list(lab)).T) assert len(mi) == (n + 1) * (m + 1) assert not mi.has_duplicates with warnings.catch_warnings(record=True): # Deprecated - see GH20239 - assert mi.get_duplicates().equals(MultiIndex.from_arrays( + assert mi.get_duplicates().equals(pd.MultiIndex.from_arrays( [[], []])) - tm.assert_numpy_array_equal(mi.duplicated(), np.zeros( - len(mi), dtype='bool')) - + tm.assert_numpy_array_equal(mi.duplicated(), + np.zeros(len(mi), dtype='bool')) -def test_get_unique_index(idx): - idx = idx[[0, 1, 0, 1, 1, 0, 0]] - expected = idx._shallow_copy(idx[[0, 1]]) - for dropna in [False, True]: - result = idx._get_unique_index(dropna=dropna) - assert result.unique - tm.assert_index_equal(result, expected) +@pytest.mark.parametrize('keep, expected', [ + ('first', np.array([False, False, False, True, True, False])), + ('last', np.array([False, True, True, False, False, False])), + (False, np.array([False, True, True, True, True, False])) +]) +def test_duplicated(idx_dup, keep, expected): + result = idx_dup.duplicated(keep=keep) + tm.assert_numpy_array_equal(result, expected) -def test_unique_na(): - idx = pd.Index([2, np.nan, 2, 1], name='my_index') - expected = pd.Index([2, np.nan, 1], name='my_index') - result = idx.unique() - tm.assert_index_equal(result, expected) - +@pytest.mark.parametrize('keep', ['first', 'last', False]) +def test_duplicated_large(keep): + # GH 9125 + n, k = 200, 5000 + levels = [np.arange(n), tm.makeStringIndex(n), 1000 + np.arange(n)] + labels = [np.random.choice(n, k * n) for lev in levels] + mi = pd.MultiIndex(levels=levels, labels=labels) -def test_duplicate_level_names_access_raises(idx): - idx.names = ['foo', 'foo'] - tm.assert_raises_regex(ValueError, 'name foo occurs multiple times', - idx._get_level_number, 'foo') + result = mi.duplicated(keep=keep) + expected = pd._libs.hashtable.duplicated_object(mi.values, keep=keep) + tm.assert_numpy_array_equal(result, expected) From 0ef988f74ee6b518765ac4dd39e65b3fe83da8e1 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Sat, 14 Jul 2018 09:08:29 +0200 Subject: [PATCH 2/5] Reorder tests for more legible diff; trigger rerun of failed build --- .../multi/test_unique_and_duplicates.py | 46 +++++++++---------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/pandas/tests/indexes/multi/test_unique_and_duplicates.py b/pandas/tests/indexes/multi/test_unique_and_duplicates.py index 774649d3e0552..1f644a080d69e 100644 --- a/pandas/tests/indexes/multi/test_unique_and_duplicates.py +++ b/pandas/tests/indexes/multi/test_unique_and_duplicates.py @@ -228,6 +228,29 @@ def f(a): check(8, True) +@pytest.mark.parametrize('keep, expected', [ + ('first', np.array([False, False, False, True, True, False])), + ('last', np.array([False, True, True, False, False, False])), + (False, np.array([False, True, True, True, True, False])) +]) +def test_duplicated(idx_dup, keep, expected): + result = idx_dup.duplicated(keep=keep) + tm.assert_numpy_array_equal(result, expected) + + +@pytest.mark.parametrize('keep', ['first', 'last', False]) +def test_duplicated_large(keep): + # GH 9125 + n, k = 200, 5000 + levels = [np.arange(n), tm.makeStringIndex(n), 1000 + np.arange(n)] + labels = [np.random.choice(n, k * n) for lev in levels] + mi = pd.MultiIndex(levels=levels, labels=labels) + + result = mi.duplicated(keep=keep) + expected = pd._libs.hashtable.duplicated_object(mi.values, keep=keep) + tm.assert_numpy_array_equal(result, expected) + + def test_get_duplicates(): # GH5873 for a in [101, 102]: @@ -258,26 +281,3 @@ def test_get_duplicates(): tm.assert_numpy_array_equal(mi.duplicated(), np.zeros(len(mi), dtype='bool')) - - -@pytest.mark.parametrize('keep, expected', [ - ('first', np.array([False, False, False, True, True, False])), - ('last', np.array([False, True, True, False, False, False])), - (False, np.array([False, True, True, True, True, False])) -]) -def test_duplicated(idx_dup, keep, expected): - result = idx_dup.duplicated(keep=keep) - tm.assert_numpy_array_equal(result, expected) - - -@pytest.mark.parametrize('keep', ['first', 'last', False]) -def test_duplicated_large(keep): - # GH 9125 - n, k = 200, 5000 - levels = [np.arange(n), tm.makeStringIndex(n), 1000 + np.arange(n)] - labels = [np.random.choice(n, k * n) for lev in levels] - mi = pd.MultiIndex(levels=levels, labels=labels) - - result = mi.duplicated(keep=keep) - expected = pd._libs.hashtable.duplicated_object(mi.values, keep=keep) - tm.assert_numpy_array_equal(result, expected) From 2a3221cd9c8ceb27121ae8a9d9a303ee65bad8c1 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Sat, 14 Jul 2018 16:39:23 +0200 Subject: [PATCH 3/5] Incorporate review (jreback) --- pandas/tests/indexes/multi/conftest.py | 26 ++++++++++++++----- pandas/tests/indexes/multi/test_names.py | 1 + .../multi/test_unique_and_duplicates.py | 15 ----------- 3 files changed, 20 insertions(+), 22 deletions(-) diff --git a/pandas/tests/indexes/multi/conftest.py b/pandas/tests/indexes/multi/conftest.py index 6cf9003500b61..afe651d22c6a7 100644 --- a/pandas/tests/indexes/multi/conftest.py +++ b/pandas/tests/indexes/multi/conftest.py @@ -15,13 +15,25 @@ def idx(): major_labels = np.array([0, 0, 1, 2, 3, 3]) minor_labels = np.array([0, 1, 0, 1, 0, 1]) index_names = ['first', 'second'] - index = MultiIndex( - levels=[major_axis, minor_axis], - labels=[major_labels, minor_labels], - names=index_names, - verify_integrity=False - ) - return index + mi = MultiIndex(levels=[major_axis, minor_axis], + labels=[major_labels, minor_labels], + names=index_names, verify_integrity=False) + return mi + + +@pytest.fixture +def idx_dup(): + # compare tests/indexes/multi/conftest.py + major_axis = Index(['foo', 'bar', 'baz', 'qux']) + minor_axis = Index(['one', 'two']) + + major_labels = np.array([0, 0, 1, 0, 1, 1]) + minor_labels = np.array([0, 1, 0, 1, 0, 1]) + index_names = ['first', 'second'] + mi = MultiIndex(levels=[major_axis, minor_axis], + labels=[major_labels, minor_labels], + names=index_names, verify_integrity=False) + return mi @pytest.fixture diff --git a/pandas/tests/indexes/multi/test_names.py b/pandas/tests/indexes/multi/test_names.py index d4fb47e81ca16..68e8bb0cf58f2 100644 --- a/pandas/tests/indexes/multi/test_names.py +++ b/pandas/tests/indexes/multi/test_names.py @@ -118,6 +118,7 @@ def test_names(idx, index_names): def test_duplicate_level_names_access_raises(idx): + # GH19029 idx.names = ['foo', 'foo'] tm.assert_raises_regex(ValueError, 'name foo occurs multiple times', idx._get_level_number, 'foo') diff --git a/pandas/tests/indexes/multi/test_unique_and_duplicates.py b/pandas/tests/indexes/multi/test_unique_and_duplicates.py index 1f644a080d69e..58e550adc3aa0 100644 --- a/pandas/tests/indexes/multi/test_unique_and_duplicates.py +++ b/pandas/tests/indexes/multi/test_unique_and_duplicates.py @@ -10,21 +10,6 @@ from pandas.compat import range, u -@pytest.fixture -def idx_dup(): - # compare tests/indexes/multi/conftest.py - major_axis = pd.Index(['foo', 'bar', 'baz', 'qux']) - minor_axis = pd.Index(['one', 'two']) - - major_labels = np.array([0, 0, 1, 0, 1, 1]) - minor_labels = np.array([0, 1, 0, 1, 0, 1]) - index_names = ['first', 'second'] - mi = pd.MultiIndex(levels=[major_axis, minor_axis], - labels=[major_labels, minor_labels], - names=index_names, verify_integrity=False) - return mi - - @pytest.mark.parametrize('names', [None, ['first', 'second']]) def test_unique(names): mi = pd.MultiIndex.from_arrays([[1, 2, 1, 2], [1, 1, 1, 2]], From d93a9a9579a2e967758e1489c839d972c0620012 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Sat, 14 Jul 2018 18:12:49 +0200 Subject: [PATCH 4/5] Conform naming scheme for duplicated tests to Series/DF --- .../multi/{test_unique_and_duplicates.py => test_duplicates.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename pandas/tests/indexes/multi/{test_unique_and_duplicates.py => test_duplicates.py} (100%) diff --git a/pandas/tests/indexes/multi/test_unique_and_duplicates.py b/pandas/tests/indexes/multi/test_duplicates.py similarity index 100% rename from pandas/tests/indexes/multi/test_unique_and_duplicates.py rename to pandas/tests/indexes/multi/test_duplicates.py From fce0d564b2c2db625cbb97b2c10e7dd21aaae037 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Sat, 14 Jul 2018 22:40:45 +0200 Subject: [PATCH 5/5] Further cleanup --- pandas/tests/indexes/multi/test_duplicates.py | 98 +++++++++---------- 1 file changed, 48 insertions(+), 50 deletions(-) diff --git a/pandas/tests/indexes/multi/test_duplicates.py b/pandas/tests/indexes/multi/test_duplicates.py index 58e550adc3aa0..1cdf0ca6e013e 100644 --- a/pandas/tests/indexes/multi/test_duplicates.py +++ b/pandas/tests/indexes/multi/test_duplicates.py @@ -2,55 +2,54 @@ import warnings from itertools import product +import pytest import numpy as np -import pandas as pd -import pandas.util.testing as tm -import pytest + from pandas.compat import range, u +from pandas import MultiIndex, DatetimeIndex +from pandas._libs import hashtable +import pandas.util.testing as tm @pytest.mark.parametrize('names', [None, ['first', 'second']]) def test_unique(names): - mi = pd.MultiIndex.from_arrays([[1, 2, 1, 2], [1, 1, 1, 2]], - names=names) + mi = MultiIndex.from_arrays([[1, 2, 1, 2], [1, 1, 1, 2]], names=names) res = mi.unique() - exp = pd.MultiIndex.from_arrays([[1, 2, 2], [1, 1, 2]], names=mi.names) + exp = MultiIndex.from_arrays([[1, 2, 2], [1, 1, 2]], names=mi.names) tm.assert_index_equal(res, exp) - mi = pd.MultiIndex.from_arrays([list('aaaa'), list('abab')], - names=names) + mi = MultiIndex.from_arrays([list('aaaa'), list('abab')], + names=names) res = mi.unique() - exp = pd.MultiIndex.from_arrays([list('aa'), list('ab')], - names=mi.names) + exp = MultiIndex.from_arrays([list('aa'), list('ab')], names=mi.names) tm.assert_index_equal(res, exp) - mi = pd.MultiIndex.from_arrays([list('aaaa'), list('aaaa')], - names=names) + mi = MultiIndex.from_arrays([list('aaaa'), list('aaaa')], names=names) res = mi.unique() - exp = pd.MultiIndex.from_arrays([['a'], ['a']], names=mi.names) + exp = MultiIndex.from_arrays([['a'], ['a']], names=mi.names) tm.assert_index_equal(res, exp) # GH #20568 - empty MI - mi = pd.MultiIndex.from_arrays([[], []], names=names) + mi = MultiIndex.from_arrays([[], []], names=names) res = mi.unique() tm.assert_index_equal(mi, res) def test_unique_datetimelike(): - idx1 = pd.DatetimeIndex(['2015-01-01', '2015-01-01', '2015-01-01', - '2015-01-01', 'NaT', 'NaT']) - idx2 = pd.DatetimeIndex(['2015-01-01', '2015-01-01', '2015-01-02', - '2015-01-02', 'NaT', '2015-01-01'], - tz='Asia/Tokyo') - result = pd.MultiIndex.from_arrays([idx1, idx2]).unique() - - eidx1 = pd.DatetimeIndex(['2015-01-01', '2015-01-01', 'NaT', 'NaT']) - eidx2 = pd.DatetimeIndex(['2015-01-01', '2015-01-02', - 'NaT', '2015-01-01'], - tz='Asia/Tokyo') - exp = pd.MultiIndex.from_arrays([eidx1, eidx2]) + idx1 = DatetimeIndex(['2015-01-01', '2015-01-01', '2015-01-01', + '2015-01-01', 'NaT', 'NaT']) + idx2 = DatetimeIndex(['2015-01-01', '2015-01-01', '2015-01-02', + '2015-01-02', 'NaT', '2015-01-01'], + tz='Asia/Tokyo') + result = MultiIndex.from_arrays([idx1, idx2]).unique() + + eidx1 = DatetimeIndex(['2015-01-01', '2015-01-01', 'NaT', 'NaT']) + eidx2 = DatetimeIndex(['2015-01-01', '2015-01-02', + 'NaT', '2015-01-01'], + tz='Asia/Tokyo') + exp = MultiIndex.from_arrays([eidx1, eidx2]) tm.assert_index_equal(result, exp) @@ -62,14 +61,14 @@ def test_unique_level(idx, level): tm.assert_index_equal(result, expected) # With already unique level - mi = pd.MultiIndex.from_arrays([[1, 3, 2, 4], [1, 3, 2, 5]], - names=['first', 'second']) + mi = MultiIndex.from_arrays([[1, 3, 2, 4], [1, 3, 2, 5]], + names=['first', 'second']) result = mi.unique(level=level) expected = mi.get_level_values(level) tm.assert_index_equal(result, expected) # With empty MI - mi = pd.MultiIndex.from_arrays([[], []], names=['first', 'second']) + mi = MultiIndex.from_arrays([[], []], names=['first', 'second']) result = mi.unique(level=level) expected = mi.get_level_values(level) @@ -88,11 +87,11 @@ def test_duplicate_multiindex_labels(): # GH 17464 # Make sure that a MultiIndex with duplicate levels throws a ValueError with pytest.raises(ValueError): - mi = pd.MultiIndex([['A'] * 10, range(10)], [[0] * 10, range(10)]) + mi = MultiIndex([['A'] * 10, range(10)], [[0] * 10, range(10)]) # And that using set_levels with duplicate levels fails - mi = pd.MultiIndex.from_arrays([['A', 'A', 'B', 'B', 'B'], - [1, 2, 1, 2, 3]]) + mi = MultiIndex.from_arrays([['A', 'A', 'B', 'B', 'B'], + [1, 2, 1, 2, 3]]) with pytest.raises(ValueError): mi.set_levels([['A', 'B', 'A', 'A', 'B'], [2, 1, 3, -2, 5]], inplace=True) @@ -102,11 +101,11 @@ def test_duplicate_multiindex_labels(): [1, 'a', 1]]) def test_duplicate_level_names(names): # GH18872, GH19029 - mi = pd.MultiIndex.from_product([[0, 1]] * 3, names=names) + mi = MultiIndex.from_product([[0, 1]] * 3, names=names) assert mi.names == names # With .rename() - mi = pd.MultiIndex.from_product([[0, 1]] * 3) + mi = MultiIndex.from_product([[0, 1]] * 3) mi = mi.rename(names) assert mi.names == names @@ -118,7 +117,7 @@ def test_duplicate_level_names(names): def test_duplicate_meta_data(): # GH 10115 - mi = pd.MultiIndex( + mi = MultiIndex( levels=[[0, 1], [0, 1, 2]], labels=[[0, 0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 0, 1, 2]]) @@ -138,9 +137,9 @@ def test_has_duplicates(idx, idx_dup): assert not idx_dup.is_unique assert idx_dup.has_duplicates - mi = pd.MultiIndex(levels=[[0, 1], [0, 1, 2]], - labels=[[0, 0, 0, 0, 1, 1, 1], - [0, 1, 2, 0, 0, 1, 2]]) + mi = MultiIndex(levels=[[0, 1], [0, 1, 2]], + labels=[[0, 0, 0, 0, 1, 1, 1], + [0, 1, 2, 0, 0, 1, 2]]) assert not mi.is_unique assert mi.has_duplicates @@ -166,7 +165,7 @@ def test_has_duplicates_from_tuples(): (u('x'), u('out'), u('z'), 33, u('y'), u('in'), u('z'), 123), (u('x'), u('out'), u('z'), 12, u('y'), u('in'), u('z'), 144)] - mi = pd.MultiIndex.from_tuples(t) + mi = MultiIndex.from_tuples(t) assert not mi.has_duplicates @@ -189,7 +188,7 @@ def check(nlevels, with_nulls): levels = [level] * nlevels + [[0, 1]] # no dups - mi = pd.MultiIndex(levels=levels, labels=labels) + mi = MultiIndex(levels=levels, labels=labels) assert not mi.has_duplicates # with a dup @@ -197,10 +196,10 @@ def check(nlevels, with_nulls): def f(a): return np.insert(a, 1000, a[0]) labels = list(map(f, labels)) - mi = pd.MultiIndex(levels=levels, labels=labels) + mi = MultiIndex(levels=levels, labels=labels) else: values = mi.values.tolist() - mi = pd.MultiIndex.from_tuples(values + [values[0]]) + mi = MultiIndex.from_tuples(values + [values[0]]) assert mi.has_duplicates @@ -229,23 +228,22 @@ def test_duplicated_large(keep): n, k = 200, 5000 levels = [np.arange(n), tm.makeStringIndex(n), 1000 + np.arange(n)] labels = [np.random.choice(n, k * n) for lev in levels] - mi = pd.MultiIndex(levels=levels, labels=labels) + mi = MultiIndex(levels=levels, labels=labels) result = mi.duplicated(keep=keep) - expected = pd._libs.hashtable.duplicated_object(mi.values, keep=keep) + expected = hashtable.duplicated_object(mi.values, keep=keep) tm.assert_numpy_array_equal(result, expected) def test_get_duplicates(): # GH5873 for a in [101, 102]: - mi = pd.MultiIndex.from_arrays([[101, a], [3.5, np.nan]]) + mi = MultiIndex.from_arrays([[101, a], [3.5, np.nan]]) assert not mi.has_duplicates with warnings.catch_warnings(record=True): # Deprecated - see GH20239 - assert mi.get_duplicates().equals(pd.MultiIndex.from_arrays( - [[], []])) + assert mi.get_duplicates().equals(MultiIndex.from_arrays([[], []])) tm.assert_numpy_array_equal(mi.duplicated(), np.zeros(2, dtype='bool')) @@ -254,14 +252,14 @@ def test_get_duplicates(): for m in range(1, 5): # 2nd level shape # all possible unique combinations, including nan lab = product(range(-1, n), range(-1, m)) - mi = pd.MultiIndex(levels=[list('abcde')[:n], list('WXYZ')[:m]], - labels=np.random.permutation(list(lab)).T) + mi = MultiIndex(levels=[list('abcde')[:n], list('WXYZ')[:m]], + labels=np.random.permutation(list(lab)).T) assert len(mi) == (n + 1) * (m + 1) assert not mi.has_duplicates with warnings.catch_warnings(record=True): # Deprecated - see GH20239 - assert mi.get_duplicates().equals(pd.MultiIndex.from_arrays( + assert mi.get_duplicates().equals(MultiIndex.from_arrays( [[], []])) tm.assert_numpy_array_equal(mi.duplicated(),