diff --git a/pandas/tests/indexes/multi/test_analytics.py b/pandas/tests/indexes/multi/test_analytics.py index 072356e4923a6..4cc0504417801 100644 --- a/pandas/tests/indexes/multi/test_analytics.py +++ b/pandas/tests/indexes/multi/test_analytics.py @@ -1,4 +1,11 @@ +# -*- coding: utf-8 -*- + +import numpy as np +import pandas as pd +import pandas.util.testing as tm import pytest +from pandas import Index, MultiIndex, date_range, period_range +from pandas.compat import lrange def test_shift(idx): @@ -6,3 +13,316 @@ def test_shift(idx): # GH8083 test the base class for shift pytest.raises(NotImplementedError, idx.shift, 1) pytest.raises(NotImplementedError, idx.shift, 1, 2) + + +def test_bounds(idx): + idx._bounds + + +def test_groupby(idx): + groups = idx.groupby(np.array([1, 1, 1, 2, 2, 2])) + labels = idx.get_values().tolist() + exp = {1: labels[:3], 2: labels[3:]} + tm.assert_dict_equal(groups, exp) + + # GH5620 + groups = idx.groupby(idx) + exp = {key: [key] for key in idx} + tm.assert_dict_equal(groups, exp) + + +def test_truncate(): + major_axis = Index(lrange(4)) + minor_axis = Index(lrange(2)) + + major_labels = np.array([0, 0, 1, 2, 3, 3]) + minor_labels = np.array([0, 1, 0, 1, 0, 1]) + + index = MultiIndex(levels=[major_axis, minor_axis], + labels=[major_labels, minor_labels]) + + result = index.truncate(before=1) + assert 'foo' not in result.levels[0] + assert 1 in result.levels[0] + + result = index.truncate(after=1) + assert 2 not in result.levels[0] + assert 1 in result.levels[0] + + result = index.truncate(before=1, after=2) + assert len(result.levels[0]) == 2 + + # after < before + pytest.raises(ValueError, index.truncate, 3, 1) + + +def test_where(): + i = MultiIndex.from_tuples([('A', 1), ('A', 2)]) + + def f(): + i.where(True) + + pytest.raises(NotImplementedError, f) + + +def test_where_array_like(): + i = MultiIndex.from_tuples([('A', 1), ('A', 2)]) + klasses = [list, tuple, np.array, pd.Series] + cond = [False, True] + + for klass in klasses: + def f(): + return i.where(klass(cond)) + pytest.raises(NotImplementedError, f) + +# TODO: reshape + + +def test_reorder_levels(idx): + # this blows up + tm.assert_raises_regex(IndexError, '^Too many levels', + idx.reorder_levels, [2, 1, 0]) + + +def test_numpy_repeat(): + reps = 2 + numbers = [1, 2, 3] + names = np.array(['foo', 'bar']) + + m = MultiIndex.from_product([ + numbers, names], names=names) + expected = MultiIndex.from_product([ + numbers, names.repeat(reps)], names=names) + tm.assert_index_equal(np.repeat(m, reps), expected) + + msg = "the 'axis' parameter is not supported" + tm.assert_raises_regex( + ValueError, msg, np.repeat, m, reps, axis=1) + + +def test_append_mixed_dtypes(): + # GH 13660 + dti = date_range('2011-01-01', freq='M', periods=3, ) + dti_tz = date_range('2011-01-01', freq='M', periods=3, tz='US/Eastern') + pi = period_range('2011-01', freq='M', periods=3) + + mi = MultiIndex.from_arrays([[1, 2, 3], + [1.1, np.nan, 3.3], + ['a', 'b', 'c'], + dti, dti_tz, pi]) + assert mi.nlevels == 6 + + res = mi.append(mi) + exp = MultiIndex.from_arrays([[1, 2, 3, 1, 2, 3], + [1.1, np.nan, 3.3, 1.1, np.nan, 3.3], + ['a', 'b', 'c', 'a', 'b', 'c'], + dti.append(dti), + dti_tz.append(dti_tz), + pi.append(pi)]) + tm.assert_index_equal(res, exp) + + other = MultiIndex.from_arrays([['x', 'y', 'z'], ['x', 'y', 'z'], + ['x', 'y', 'z'], ['x', 'y', 'z'], + ['x', 'y', 'z'], ['x', 'y', 'z']]) + + res = mi.append(other) + exp = MultiIndex.from_arrays([[1, 2, 3, 'x', 'y', 'z'], + [1.1, np.nan, 3.3, 'x', 'y', 'z'], + ['a', 'b', 'c', 'x', 'y', 'z'], + dti.append(pd.Index(['x', 'y', 'z'])), + dti_tz.append(pd.Index(['x', 'y', 'z'])), + pi.append(pd.Index(['x', 'y', 'z']))]) + tm.assert_index_equal(res, exp) + + +def test_take(idx): + indexer = [4, 3, 0, 2] + result = idx.take(indexer) + expected = idx[indexer] + assert result.equals(expected) + + # TODO: Remove Commented Code + # if not isinstance(idx, + # (DatetimeIndex, PeriodIndex, TimedeltaIndex)): + # GH 10791 + with pytest.raises(AttributeError): + idx.freq + + +def test_take_invalid_kwargs(idx): + idx = idx + indices = [1, 2] + + msg = r"take\(\) got an unexpected keyword argument 'foo'" + tm.assert_raises_regex(TypeError, msg, idx.take, + indices, foo=2) + + msg = "the 'out' parameter is not supported" + tm.assert_raises_regex(ValueError, msg, idx.take, + indices, out=indices) + + msg = "the 'mode' parameter is not supported" + tm.assert_raises_regex(ValueError, msg, idx.take, + indices, mode='clip') + + +def test_take_fill_value(): + # GH 12631 + vals = [['A', 'B'], + [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02')]] + idx = pd.MultiIndex.from_product(vals, names=['str', 'dt']) + + result = idx.take(np.array([1, 0, -1])) + exp_vals = [('A', pd.Timestamp('2011-01-02')), + ('A', pd.Timestamp('2011-01-01')), + ('B', pd.Timestamp('2011-01-02'))] + expected = pd.MultiIndex.from_tuples(exp_vals, names=['str', 'dt']) + tm.assert_index_equal(result, expected) + + # fill_value + result = idx.take(np.array([1, 0, -1]), fill_value=True) + exp_vals = [('A', pd.Timestamp('2011-01-02')), + ('A', pd.Timestamp('2011-01-01')), + (np.nan, pd.NaT)] + expected = pd.MultiIndex.from_tuples(exp_vals, names=['str', 'dt']) + tm.assert_index_equal(result, expected) + + # allow_fill=False + result = idx.take(np.array([1, 0, -1]), allow_fill=False, + fill_value=True) + exp_vals = [('A', pd.Timestamp('2011-01-02')), + ('A', pd.Timestamp('2011-01-01')), + ('B', pd.Timestamp('2011-01-02'))] + expected = pd.MultiIndex.from_tuples(exp_vals, names=['str', 'dt']) + tm.assert_index_equal(result, expected) + + msg = ('When allow_fill=True and fill_value is not None, ' + 'all indices must be >= -1') + with tm.assert_raises_regex(ValueError, msg): + idx.take(np.array([1, 0, -2]), fill_value=True) + with tm.assert_raises_regex(ValueError, msg): + idx.take(np.array([1, 0, -5]), fill_value=True) + + with pytest.raises(IndexError): + idx.take(np.array([1, -5])) + + +def test_iter(idx): + result = list(idx) + expected = [('foo', 'one'), ('foo', 'two'), ('bar', 'one'), + ('baz', 'two'), ('qux', 'one'), ('qux', 'two')] + assert result == expected + + +def test_sub(idx): + + first = idx + + # - now raises (previously was set op difference) + with pytest.raises(TypeError): + first - idx[-3:] + with pytest.raises(TypeError): + idx[-3:] - first + with pytest.raises(TypeError): + idx[-3:] - first.tolist() + with pytest.raises(TypeError): + first.tolist() - idx[-3:] + + +def test_map(idx): + # callable + index = idx + + # we don't infer UInt64 + if isinstance(index, pd.UInt64Index): + expected = index.astype('int64') + else: + expected = index + + result = index.map(lambda x: x) + tm.assert_index_equal(result, expected) + + +@pytest.mark.parametrize( + "mapper", + [ + lambda values, idx: {i: e for e, i in zip(values, idx)}, + lambda values, idx: pd.Series(values, idx)]) +def test_map_dictlike(idx, mapper): + + if isinstance(idx, (pd.CategoricalIndex, pd.IntervalIndex)): + pytest.skip("skipping tests for {}".format(type(idx))) + + identity = mapper(idx.values, idx) + + # we don't infer to UInt64 for a dict + if isinstance(idx, pd.UInt64Index) and isinstance(identity, dict): + expected = idx.astype('int64') + else: + expected = idx + + result = idx.map(identity) + tm.assert_index_equal(result, expected) + + # empty mappable + expected = pd.Index([np.nan] * len(idx)) + result = idx.map(mapper(expected, idx)) + tm.assert_index_equal(result, expected) + + +@pytest.mark.parametrize('func', [ + np.exp, np.exp2, np.expm1, np.log, np.log2, np.log10, + np.log1p, np.sqrt, np.sin, np.cos, np.tan, np.arcsin, + np.arccos, np.arctan, np.sinh, np.cosh, np.tanh, + np.arcsinh, np.arccosh, np.arctanh, np.deg2rad, + np.rad2deg +]) +def test_numpy_ufuncs(func): + # test ufuncs of numpy 1.9.2. see: + # http://docs.scipy.org/doc/numpy/reference/ufuncs.html + + # some functions are skipped because it may return different result + # for unicode input depending on numpy version + + # copy and paste from idx fixture as pytest doesn't support + # parameters and fixtures at the same time. + major_axis = Index(['foo', 'bar', 'baz', 'qux']) + minor_axis = Index(['one', 'two']) + major_labels = np.array([0, 0, 1, 2, 3, 3]) + minor_labels = np.array([0, 1, 0, 1, 0, 1]) + index_names = ['first', 'second'] + + idx = MultiIndex( + levels=[major_axis, minor_axis], + labels=[major_labels, minor_labels], + names=index_names, + verify_integrity=False + ) + + with pytest.raises(Exception): + with np.errstate(all='ignore'): + func(idx) + + +@pytest.mark.parametrize('func', [ + np.isfinite, np.isinf, np.isnan, np.signbit +]) +def test_numpy_type_funcs(func): + # for func in [np.isfinite, np.isinf, np.isnan, np.signbit]: + # copy and paste from idx fixture as pytest doesn't support + # parameters and fixtures at the same time. + major_axis = Index(['foo', 'bar', 'baz', 'qux']) + minor_axis = Index(['one', 'two']) + major_labels = np.array([0, 0, 1, 2, 3, 3]) + minor_labels = np.array([0, 1, 0, 1, 0, 1]) + index_names = ['first', 'second'] + + idx = MultiIndex( + levels=[major_axis, minor_axis], + labels=[major_labels, minor_labels], + names=index_names, + verify_integrity=False + ) + + with pytest.raises(Exception): + func(idx) diff --git a/pandas/tests/indexes/multi/test_astype.py b/pandas/tests/indexes/multi/test_astype.py new file mode 100644 index 0000000000000..e0e23609290e5 --- /dev/null +++ b/pandas/tests/indexes/multi/test_astype.py @@ -0,0 +1,31 @@ +# -*- coding: utf-8 -*- + +import numpy as np +import pandas.util.testing as tm +import pytest +from pandas.util.testing import assert_copy +from pandas.core.dtypes.dtypes import CategoricalDtype + + +def test_astype(idx): + expected = idx.copy() + actual = idx.astype('O') + assert_copy(actual.levels, expected.levels) + assert_copy(actual.labels, expected.labels) + assert [level.name for level in actual.levels] == list(expected.names) + + with tm.assert_raises_regex(TypeError, "^Setting.*dtype.*object"): + idx.astype(np.dtype(int)) + + +@pytest.mark.parametrize('ordered', [True, False]) +def test_astype_category(idx, ordered): + # GH 18630 + msg = '> 1 ndim Categorical are not supported at this time' + with tm.assert_raises_regex(NotImplementedError, msg): + idx.astype(CategoricalDtype(ordered=ordered)) + + if ordered is False: + # dtype='category' defaults to ordered=False, so only test once + with tm.assert_raises_regex(NotImplementedError, msg): + idx.astype('category') diff --git a/pandas/tests/indexes/multi/test_constructor.py b/pandas/tests/indexes/multi/test_constructor.py index 9577662bda366..4b8d0553886b2 100644 --- a/pandas/tests/indexes/multi/test_constructor.py +++ b/pandas/tests/indexes/multi/test_constructor.py @@ -234,29 +234,30 @@ def test_from_arrays_empty(): tm.assert_index_equal(result, expected) -def test_from_arrays_invalid_input(): +@pytest.mark.parametrize('invalid_array', [ + (1), + ([1]), + ([1, 2]), + ([[1], 2]), + ('a'), + (['a']), + (['a', 'b']), + ([['a'], 'b']), +]) +def test_from_arrays_invalid_input(invalid_array): invalid_inputs = [1, [1], [1, 2], [[1], 2], 'a', ['a'], ['a', 'b'], [['a'], 'b']] for i in invalid_inputs: pytest.raises(TypeError, MultiIndex.from_arrays, arrays=i) -def test_from_arrays_different_lengths(): +@pytest.mark.parametrize('idx1, idx2', [ + ([1, 2, 3], ['a', 'b']), + ([], ['a', 'b']), + ([1, 2, 3], []) +]) +def test_from_arrays_different_lengths(idx1, idx2): # see gh-13599 - idx1 = [1, 2, 3] - idx2 = ['a', 'b'] - tm.assert_raises_regex(ValueError, '^all arrays must ' - 'be same length$', - MultiIndex.from_arrays, [idx1, idx2]) - - idx1 = [] - idx2 = ['a', 'b'] - tm.assert_raises_regex(ValueError, '^all arrays must ' - 'be same length$', - MultiIndex.from_arrays, [idx1, idx2]) - - idx1 = [1, 2, 3] - idx2 = [] tm.assert_raises_regex(ValueError, '^all arrays must ' 'be same length$', MultiIndex.from_arrays, [idx1, idx2]) @@ -305,66 +306,87 @@ def test_from_tuples_index_values(idx): assert (result.values == idx.values).all() -def test_from_product_empty(): +def test_from_product_empty_zero_levels(): # 0 levels with tm.assert_raises_regex( ValueError, "Must pass non-zero number of levels/labels"): MultiIndex.from_product([]) - # 1 level + +def test_from_product_empty_one_level(): result = MultiIndex.from_product([[]], names=['A']) expected = pd.Index([], name='A') tm.assert_index_equal(result.levels[0], expected) - # 2 levels - l1 = [[], ['foo', 'bar', 'baz'], []] - l2 = [[], [], ['a', 'b', 'c']] + +@pytest.mark.parametrize('first, second', [ + ([], []), + (['foo', 'bar', 'baz'], []), + ([], ['a', 'b', 'c']), +]) +def test_from_product_empty_two_levels(first, second): names = ['A', 'B'] - for first, second in zip(l1, l2): - result = MultiIndex.from_product([first, second], names=names) - expected = MultiIndex(levels=[first, second], - labels=[[], []], names=names) - tm.assert_index_equal(result, expected) + result = MultiIndex.from_product([first, second], names=names) + expected = MultiIndex(levels=[first, second], + labels=[[], []], names=names) + tm.assert_index_equal(result, expected) + +@pytest.mark.parametrize('N', list(range(4))) +def test_from_product_empty_three_levels(N): # GH12258 names = ['A', 'B', 'C'] - for N in range(4): - lvl2 = lrange(N) - result = MultiIndex.from_product([[], lvl2, []], names=names) - expected = MultiIndex(levels=[[], lvl2, []], - labels=[[], [], []], names=names) - tm.assert_index_equal(result, expected) + lvl2 = lrange(N) + result = MultiIndex.from_product([[], lvl2, []], names=names) + expected = MultiIndex(levels=[[], lvl2, []], + labels=[[], [], []], names=names) + tm.assert_index_equal(result, expected) -def test_from_product_invalid_input(): - invalid_inputs = [1, [1], [1, 2], [[1], 2], - 'a', ['a'], ['a', 'b'], [['a'], 'b']] - for i in invalid_inputs: - pytest.raises(TypeError, MultiIndex.from_product, iterables=i) +@pytest.mark.parametrize('invalid_input', [ + 1, + [1], + [1, 2], + [[1], 2], + 'a', + ['a'], + ['a', 'b'], + [['a'], 'b'], +]) +def test_from_product_invalid_input(invalid_input): + pytest.raises(TypeError, MultiIndex.from_product, iterables=invalid_input) def test_from_product_datetimeindex(): dt_index = date_range('2000-01-01', periods=2) mi = pd.MultiIndex.from_product([[1, 2], dt_index]) - etalon = construct_1d_object_array_from_listlike([(1, pd.Timestamp( - '2000-01-01')), (1, pd.Timestamp('2000-01-02')), (2, pd.Timestamp( - '2000-01-01')), (2, pd.Timestamp('2000-01-02'))]) + etalon = construct_1d_object_array_from_listlike([ + (1, pd.Timestamp('2000-01-01')), + (1, pd.Timestamp('2000-01-02')), + (2, pd.Timestamp('2000-01-01')), + (2, pd.Timestamp('2000-01-02')), + ]) tm.assert_numpy_array_equal(mi.values, etalon) -def test_from_product_index_series_categorical(): +@pytest.mark.parametrize('ordered', [False, True]) +@pytest.mark.parametrize('f', [ + lambda x: x, + lambda x: pd.Series(x), + lambda x: x.values +]) +def test_from_product_index_series_categorical(ordered, f): # GH13743 first = ['foo', 'bar'] - for ordered in [False, True]: - idx = pd.CategoricalIndex(list("abcaab"), categories=list("bac"), - ordered=ordered) - expected = pd.CategoricalIndex(list("abcaab") + list("abcaab"), - categories=list("bac"), - ordered=ordered) - for arr in [idx, pd.Series(idx), idx.values]: - result = pd.MultiIndex.from_product([first, arr]) - tm.assert_index_equal(result.get_level_values(1), expected) + idx = pd.CategoricalIndex(list("abcaab"), categories=list("bac"), + ordered=ordered) + expected = pd.CategoricalIndex(list("abcaab") + list("abcaab"), + categories=list("bac"), + ordered=ordered) + + result = pd.MultiIndex.from_product([first, f(idx)]) + tm.assert_index_equal(result.get_level_values(1), expected) def test_from_product(): @@ -409,19 +431,28 @@ def test_create_index_existing_name(idx): index = idx index.names = ['foo', 'bar'] result = pd.Index(index) - tm.assert_index_equal( - result, Index(Index([('foo', 'one'), ('foo', 'two'), - ('bar', 'one'), ('baz', 'two'), - ('qux', 'one'), ('qux', 'two')], - dtype='object'), - names=['foo', 'bar'])) + expected = Index( + Index([ + ('foo', 'one'), ('foo', 'two'), + ('bar', 'one'), ('baz', 'two'), + ('qux', 'one'), ('qux', 'two')], + dtype='object' + ), + names=['foo', 'bar'] + ) + tm.assert_index_equal(result, expected) result = pd.Index(index, names=['A', 'B']) - tm.assert_index_equal( - result, - Index(Index([('foo', 'one'), ('foo', 'two'), ('bar', 'one'), - ('baz', 'two'), ('qux', 'one'), ('qux', 'two')], - dtype='object'), names=['A', 'B'])) + expected = Index( + Index([ + ('foo', 'one'), ('foo', 'two'), + ('bar', 'one'), ('baz', 'two'), + ('qux', 'one'), ('qux', 'two')], + dtype='object' + ), + names=['A', 'B'] + ) + tm.assert_index_equal(result, expected) def test_tuples_with_name_string(): diff --git a/pandas/tests/indexes/multi/test_contains.py b/pandas/tests/indexes/multi/test_contains.py index aaed4467816da..7b91a1d14d7e8 100644 --- a/pandas/tests/indexes/multi/test_contains.py +++ b/pandas/tests/indexes/multi/test_contains.py @@ -43,8 +43,10 @@ def test_isin_nan_pypy(): def test_isin(): values = [('foo', 2), ('bar', 3), ('quux', 4)] - idx = MultiIndex.from_arrays([['qux', 'baz', 'foo', 'bar'], np.arange( - 4)]) + idx = MultiIndex.from_arrays([ + ['qux', 'baz', 'foo', 'bar'], + np.arange(4) + ]) result = idx.isin(values) expected = np.array([False, False, True, True]) tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/indexes/multi/test_copy.py b/pandas/tests/indexes/multi/test_copy.py index 282f2fa84efe0..f6c5c0c5eb346 100644 --- a/pandas/tests/indexes/multi/test_copy.py +++ b/pandas/tests/indexes/multi/test_copy.py @@ -3,8 +3,8 @@ from copy import copy, deepcopy import pandas.util.testing as tm -from pandas import (CategoricalIndex, IntervalIndex, MultiIndex, PeriodIndex, - RangeIndex, Series, compat) +import pytest +from pandas import MultiIndex def assert_multiindex_copied(copy, original): @@ -41,84 +41,46 @@ def test_view(idx): assert_multiindex_copied(i_view, idx) -def test_copy_name(idx): - # gh-12309: Check that the "name" argument - # passed at initialization is honored. - - # TODO: Remove or refactor MultiIndex not tested. - for name, index in compat.iteritems({'idx': idx}): - if isinstance(index, MultiIndex): - continue - - first = index.__class__(index, copy=True, name='mario') - second = first.__class__(first, copy=False) - - # Even though "copy=False", we want a new object. - assert first is not second - - # Not using tm.assert_index_equal() since names differ. - assert index.equals(first) - - assert first.name == 'mario' - assert second.name == 'mario' - - s1 = Series(2, index=first) - s2 = Series(3, index=second[:-1]) - - if not isinstance(index, CategoricalIndex): - # See gh-13365 - s3 = s1 * s2 - assert s3.index.name == 'mario' - - -def test_ensure_copied_data(idx): - # Check the "copy" argument of each Index.__new__ is honoured - # GH12309 - # TODO: REMOVE THIS TEST. MultiIndex is tested seperately as noted below. - - for name, index in compat.iteritems({'idx': idx}): - init_kwargs = {} - if isinstance(index, PeriodIndex): - # Needs "freq" specification: - init_kwargs['freq'] = index.freq - elif isinstance(index, (RangeIndex, MultiIndex, CategoricalIndex)): - # RangeIndex cannot be initialized from data - # MultiIndex and CategoricalIndex are tested separately - continue - - index_type = index.__class__ - result = index_type(index.values, copy=True, **init_kwargs) - tm.assert_index_equal(index, result) - tm.assert_numpy_array_equal(index.values, result.values, - check_same='copy') - - if isinstance(index, PeriodIndex): - # .values an object array of Period, thus copied - result = index_type(ordinal=index.asi8, copy=False, - **init_kwargs) - tm.assert_numpy_array_equal(index._ndarray_values, - result._ndarray_values, - check_same='same') - elif isinstance(index, IntervalIndex): - # checked in test_interval.py - pass - else: - result = index_type(index.values, copy=False, **init_kwargs) - tm.assert_numpy_array_equal(index.values, result.values, - check_same='same') - tm.assert_numpy_array_equal(index._ndarray_values, - result._ndarray_values, - check_same='same') - - -def test_copy_and_deepcopy(indices): - - if isinstance(indices, MultiIndex): - return - for func in (copy, deepcopy): - idx_copy = func(indices) - assert idx_copy is not indices - assert idx_copy.equals(indices) - - new_copy = indices.copy(deep=True, name="banana") - assert new_copy.name == "banana" +@pytest.mark.parametrize('func', [copy, deepcopy]) +def test_copy_and_deepcopy(func): + + idx = MultiIndex( + levels=[['foo', 'bar'], ['fizz', 'buzz']], + labels=[[0, 0, 0, 1], [0, 0, 1, 1]], + names=['first', 'second'] + ) + idx_copy = func(idx) + assert idx_copy is not idx + assert idx_copy.equals(idx) + + +@pytest.mark.parametrize('deep', [True, False]) +def test_copy_method(deep): + idx = MultiIndex( + levels=[['foo', 'bar'], ['fizz', 'buzz']], + labels=[[0, 0, 0, 1], [0, 0, 1, 1]], + names=['first', 'second'] + ) + idx_copy = idx.copy(deep=deep) + assert idx_copy.equals(idx) + + +@pytest.mark.parametrize('deep', [True, False]) +@pytest.mark.parametrize('kwarg, value', [ + ('names', ['thrid', 'fourth']), + ('levels', [['foo2', 'bar2'], ['fizz2', 'buzz2']]), + ('labels', [[1, 0, 0, 0], [1, 1, 0, 0]]) +]) +def test_copy_method_kwargs(deep, kwarg, value): + # gh-12309: Check that the "name" argument as well other kwargs are honored + idx = MultiIndex( + levels=[['foo', 'bar'], ['fizz', 'buzz']], + labels=[[0, 0, 0, 1], [0, 0, 1, 1]], + names=['first', 'second'] + ) + + idx_copy = idx.copy(**{kwarg: value, 'deep': deep}) + if kwarg == 'names': + assert getattr(idx_copy, kwarg) == value + else: + assert list(list(i) for i in getattr(idx_copy, kwarg)) == value diff --git a/pandas/tests/indexes/multi/test_equivalence.py b/pandas/tests/indexes/multi/test_equivalence.py index 0bebe3165e2e8..7770ee96bbfb3 100644 --- a/pandas/tests/indexes/multi/test_equivalence.py +++ b/pandas/tests/indexes/multi/test_equivalence.py @@ -4,29 +4,25 @@ import numpy as np import pandas as pd import pandas.util.testing as tm -from pandas import Index, MultiIndex, RangeIndex, Series, compat +from pandas import Index, MultiIndex, Series from pandas.compat import lrange, lzip, range def test_equals(idx): - # TODO: Remove or Refactor. MultiIndex not tested. - for name, idx in compat.iteritems({'idx': idx}): - assert idx.equals(idx) - assert idx.equals(idx.copy()) - assert idx.equals(idx.astype(object)) - - assert not idx.equals(list(idx)) - assert not idx.equals(np.array(idx)) - - # Cannot pass in non-int64 dtype to RangeIndex - if not isinstance(idx, RangeIndex): - same_values = Index(idx, dtype=object) - assert idx.equals(same_values) - assert same_values.equals(idx) - - if idx.nlevels == 1: - # do not test MultiIndex - assert not idx.equals(pd.Series(idx)) + assert idx.equals(idx) + assert idx.equals(idx.copy()) + assert idx.equals(idx.astype(object)) + + assert not idx.equals(list(idx)) + assert not idx.equals(np.array(idx)) + + same_values = Index(idx, dtype=object) + assert idx.equals(same_values) + assert same_values.equals(idx) + + if idx.nlevels == 1: + # do not test MultiIndex + assert not idx.equals(pd.Series(idx)) def test_equals_op(idx): diff --git a/pandas/tests/indexes/multi/test_format.py b/pandas/tests/indexes/multi/test_format.py index 21e8a199cadd9..63936a74b6b8c 100644 --- a/pandas/tests/indexes/multi/test_format.py +++ b/pandas/tests/indexes/multi/test_format.py @@ -100,11 +100,6 @@ def test_repr_roundtrip(): tm.assert_index_equal(result, mi_u, exact=True) -def test_str(): - # tested elsewhere - pass - - def test_unicode_string_with_unicode(): d = {"a": [u("\u05d0"), 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]} idx = pd.DataFrame(d).set_index(["a", "b"]).index diff --git a/pandas/tests/indexes/multi/test_get_set.py b/pandas/tests/indexes/multi/test_get_set.py index 56fd4c04cb96e..30be5b546f7c7 100644 --- a/pandas/tests/indexes/multi/test_get_set.py +++ b/pandas/tests/indexes/multi/test_get_set.py @@ -9,6 +9,16 @@ from pandas.compat import range +def assert_matching(actual, expected, check_dtype=False): + # avoid specifying internal representation + # as much as possible + assert len(actual) == len(expected) + for act, exp in zip(actual, expected): + act = np.asarray(act) + exp = np.asarray(exp) + tm.assert_numpy_array_equal(act, exp, check_dtype=check_dtype) + + def test_get_level_number_integer(idx): idx.names = [1, 0] assert idx._get_level_number(1) == 0 @@ -164,15 +174,6 @@ def test_set_levels(idx): levels = idx.levels new_levels = [[lev + 'a' for lev in level] for level in levels] - def assert_matching(actual, expected, check_dtype=False): - # avoid specifying internal representation - # as much as possible - assert len(actual) == len(expected) - for act, exp in zip(actual, expected): - act = np.asarray(act) - exp = np.asarray(exp) - tm.assert_numpy_array_equal(act, exp, check_dtype=check_dtype) - # level changing [w/o mutation] ind2 = idx.set_levels(new_levels) assert_matching(ind2.levels, new_levels) @@ -254,15 +255,6 @@ def test_set_labels(idx): minor_labels = [(x + 1) % 1 for x in minor_labels] new_labels = [major_labels, minor_labels] - def assert_matching(actual, expected): - # avoid specifying internal representation - # as much as possible - assert len(actual) == len(expected) - for act, exp in zip(actual, expected): - act = np.asarray(act) - exp = np.asarray(exp, dtype=np.int8) - tm.assert_numpy_array_equal(act, exp) - # label changing [w/o mutation] ind2 = idx.set_labels(new_labels) assert_matching(ind2.labels, new_labels) @@ -389,21 +381,22 @@ def test_set_names_with_nlevel_1(inplace): tm.assert_index_equal(result, expected) -def test_set_levels_categorical(): +@pytest.mark.parametrize('ordered', [True, False]) +def test_set_levels_categorical(ordered): # GH13854 index = MultiIndex.from_arrays([list("xyzx"), [0, 1, 2, 3]]) - for ordered in [False, True]: - cidx = CategoricalIndex(list("bac"), ordered=ordered) - result = index.set_levels(cidx, 0) - expected = MultiIndex(levels=[cidx, [0, 1, 2, 3]], - labels=index.labels) - tm.assert_index_equal(result, expected) - - result_lvl = result.get_level_values(0) - expected_lvl = CategoricalIndex(list("bacb"), - categories=cidx.categories, - ordered=cidx.ordered) - tm.assert_index_equal(result_lvl, expected_lvl) + + cidx = CategoricalIndex(list("bac"), ordered=ordered) + result = index.set_levels(cidx, 0) + expected = MultiIndex(levels=[cidx, [0, 1, 2, 3]], + labels=index.labels) + tm.assert_index_equal(result, expected) + + result_lvl = result.get_level_values(0) + expected_lvl = CategoricalIndex(list("bacb"), + categories=cidx.categories, + ordered=cidx.ordered) + tm.assert_index_equal(result_lvl, expected_lvl) def test_set_value_keeps_names(): diff --git a/pandas/tests/indexes/multi/test_indexing.py b/pandas/tests/indexes/multi/test_indexing.py index 0b528541e5eb6..ebd50909bae98 100644 --- a/pandas/tests/indexes/multi/test_indexing.py +++ b/pandas/tests/indexes/multi/test_indexing.py @@ -109,31 +109,6 @@ def test_slice_locs_not_contained(): assert result == (0, len(index)) -def test_insert_base(idx): - - result = idx[1:4] - - # test 0th element - assert idx[0:4].equals(result.insert(0, idx[0])) - - -def test_delete_base(idx): - - expected = idx[1:] - result = idx.delete(0) - assert result.equals(expected) - assert result.name == expected.name - - expected = idx[:-1] - result = idx.delete(-1) - assert result.equals(expected) - assert result.name == expected.name - - with pytest.raises((IndexError, ValueError)): - # either depending on numpy version - result = idx.delete(len(idx)) - - def test_putmask_with_wrong_mask(idx): # GH18368 diff --git a/pandas/tests/indexes/multi/test_join.py b/pandas/tests/indexes/multi/test_join.py index 4a386c6e8dbe4..ac3958956bae7 100644 --- a/pandas/tests/indexes/multi/test_join.py +++ b/pandas/tests/indexes/multi/test_join.py @@ -8,10 +8,11 @@ from pandas import Index, MultiIndex -@pytest.mark.parametrize('other', - [Index(['three', 'one', 'two']), - Index(['one']), - Index(['one', 'three'])]) +@pytest.mark.parametrize('other', [ + Index(['three', 'one', 'two']), + Index(['one']), + Index(['one', 'three']), +]) def test_join_level(idx, other, join_type): join_index, lidx, ridx = other.join(idx, how=join_type, level='second', diff --git a/pandas/tests/indexes/multi/test_missing.py b/pandas/tests/indexes/multi/test_missing.py index 01465ea4c2f3b..79fcff965e725 100644 --- a/pandas/tests/indexes/multi/test_missing.py +++ b/pandas/tests/indexes/multi/test_missing.py @@ -4,7 +4,7 @@ import pandas as pd import pandas.util.testing as tm import pytest -from pandas import Int64Index, MultiIndex, PeriodIndex, UInt64Index, isna +from pandas import Int64Index, MultiIndex, PeriodIndex, UInt64Index from pandas._libs.tslib import iNaT from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin @@ -78,27 +78,9 @@ def test_nulls(idx): # this is really a smoke test for the methods # as these are adequately tested for function elsewhere - # TODO: Remove or Refactor. MultiIndex not Implemeted. - for name, index in [('idx', idx), ]: - if len(index) == 0: - tm.assert_numpy_array_equal( - index.isna(), np.array([], dtype=bool)) - elif isinstance(index, MultiIndex): - idx = index.copy() - msg = "isna is not defined for MultiIndex" - with tm.assert_raises_regex(NotImplementedError, msg): - idx.isna() - else: - - if not index.hasnans: - tm.assert_numpy_array_equal( - index.isna(), np.zeros(len(index), dtype=bool)) - tm.assert_numpy_array_equal( - index.notna(), np.ones(len(index), dtype=bool)) - else: - result = isna(index) - tm.assert_numpy_array_equal(index.isna(), result) - tm.assert_numpy_array_equal(index.notna(), ~result) + msg = "isna is not defined for MultiIndex" + with tm.assert_raises_regex(NotImplementedError, msg): + idx.isna() @pytest.mark.xfail diff --git a/pandas/tests/indexes/multi/test_operations.py b/pandas/tests/indexes/multi/test_operations.py deleted file mode 100644 index d38cb28039595..0000000000000 --- a/pandas/tests/indexes/multi/test_operations.py +++ /dev/null @@ -1,448 +0,0 @@ -# -*- coding: utf-8 -*- - -import numpy as np -import pandas as pd -import pandas.util.testing as tm -import pytest -from pandas import (DatetimeIndex, Float64Index, Index, Int64Index, MultiIndex, - PeriodIndex, TimedeltaIndex, UInt64Index, date_range, - period_range) -from pandas.compat import lrange, range -from pandas.core.dtypes.dtypes import CategoricalDtype -from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin -from pandas.util.testing import assert_copy - - -def check_level_names(index, names): - assert [level.name for level in index.levels] == list(names) - - -def test_insert(idx): - # key contained in all levels - new_index = idx.insert(0, ('bar', 'two')) - assert new_index.equal_levels(idx) - assert new_index[0] == ('bar', 'two') - - # key not contained in all levels - new_index = idx.insert(0, ('abc', 'three')) - - exp0 = Index(list(idx.levels[0]) + ['abc'], name='first') - tm.assert_index_equal(new_index.levels[0], exp0) - - exp1 = Index(list(idx.levels[1]) + ['three'], name='second') - tm.assert_index_equal(new_index.levels[1], exp1) - assert new_index[0] == ('abc', 'three') - - # key wrong length - msg = "Item must have length equal to number of levels" - with tm.assert_raises_regex(ValueError, msg): - idx.insert(0, ('foo2',)) - - left = pd.DataFrame([['a', 'b', 0], ['b', 'd', 1]], - columns=['1st', '2nd', '3rd']) - left.set_index(['1st', '2nd'], inplace=True) - ts = left['3rd'].copy(deep=True) - - left.loc[('b', 'x'), '3rd'] = 2 - left.loc[('b', 'a'), '3rd'] = -1 - left.loc[('b', 'b'), '3rd'] = 3 - left.loc[('a', 'x'), '3rd'] = 4 - left.loc[('a', 'w'), '3rd'] = 5 - left.loc[('a', 'a'), '3rd'] = 6 - - ts.loc[('b', 'x')] = 2 - ts.loc['b', 'a'] = -1 - ts.loc[('b', 'b')] = 3 - ts.loc['a', 'x'] = 4 - ts.loc[('a', 'w')] = 5 - ts.loc['a', 'a'] = 6 - - right = pd.DataFrame([['a', 'b', 0], ['b', 'd', 1], ['b', 'x', 2], - ['b', 'a', -1], ['b', 'b', 3], ['a', 'x', 4], - ['a', 'w', 5], ['a', 'a', 6]], - columns=['1st', '2nd', '3rd']) - right.set_index(['1st', '2nd'], inplace=True) - # FIXME data types changes to float because - # of intermediate nan insertion; - tm.assert_frame_equal(left, right, check_dtype=False) - tm.assert_series_equal(ts, right['3rd']) - - # GH9250 - idx = [('test1', i) for i in range(5)] + \ - [('test2', i) for i in range(6)] + \ - [('test', 17), ('test', 18)] - - left = pd.Series(np.linspace(0, 10, 11), - pd.MultiIndex.from_tuples(idx[:-2])) - - left.loc[('test', 17)] = 11 - left.loc[('test', 18)] = 12 - - right = pd.Series(np.linspace(0, 12, 13), - pd.MultiIndex.from_tuples(idx)) - - tm.assert_series_equal(left, right) - - -def test_bounds(idx): - idx._bounds - - -def test_append(idx): - result = idx[:3].append(idx[3:]) - assert result.equals(idx) - - foos = [idx[:1], idx[1:3], idx[3:]] - result = foos[0].append(foos[1:]) - assert result.equals(idx) - - # empty - result = idx.append([]) - assert result.equals(idx) - - -def test_groupby(idx): - groups = idx.groupby(np.array([1, 1, 1, 2, 2, 2])) - labels = idx.get_values().tolist() - exp = {1: labels[:3], 2: labels[3:]} - tm.assert_dict_equal(groups, exp) - - # GH5620 - groups = idx.groupby(idx) - exp = {key: [key] for key in idx} - tm.assert_dict_equal(groups, exp) - - -def test_truncate(): - major_axis = Index(lrange(4)) - minor_axis = Index(lrange(2)) - - major_labels = np.array([0, 0, 1, 2, 3, 3]) - minor_labels = np.array([0, 1, 0, 1, 0, 1]) - - index = MultiIndex(levels=[major_axis, minor_axis], - labels=[major_labels, minor_labels]) - - result = index.truncate(before=1) - assert 'foo' not in result.levels[0] - assert 1 in result.levels[0] - - result = index.truncate(after=1) - assert 2 not in result.levels[0] - assert 1 in result.levels[0] - - result = index.truncate(before=1, after=2) - assert len(result.levels[0]) == 2 - - # after < before - pytest.raises(ValueError, index.truncate, 3, 1) - - -def test_where(): - i = MultiIndex.from_tuples([('A', 1), ('A', 2)]) - - def f(): - i.where(True) - - pytest.raises(NotImplementedError, f) - - -def test_where_array_like(): - i = MultiIndex.from_tuples([('A', 1), ('A', 2)]) - klasses = [list, tuple, np.array, pd.Series] - cond = [False, True] - - for klass in klasses: - def f(): - return i.where(klass(cond)) - pytest.raises(NotImplementedError, f) - - -def test_reorder_levels(idx): - # this blows up - tm.assert_raises_regex(IndexError, '^Too many levels', - idx.reorder_levels, [2, 1, 0]) - - -def test_astype(idx): - expected = idx.copy() - actual = idx.astype('O') - assert_copy(actual.levels, expected.levels) - assert_copy(actual.labels, expected.labels) - check_level_names(actual, expected.names) - - with tm.assert_raises_regex(TypeError, "^Setting.*dtype.*object"): - idx.astype(np.dtype(int)) - - -@pytest.mark.parametrize('ordered', [True, False]) -def test_astype_category(idx, ordered): - # GH 18630 - msg = '> 1 ndim Categorical are not supported at this time' - with tm.assert_raises_regex(NotImplementedError, msg): - idx.astype(CategoricalDtype(ordered=ordered)) - - if ordered is False: - # dtype='category' defaults to ordered=False, so only test once - with tm.assert_raises_regex(NotImplementedError, msg): - idx.astype('category') - - -def test_repeat(): - reps = 2 - numbers = [1, 2, 3] - names = np.array(['foo', 'bar']) - - m = MultiIndex.from_product([ - numbers, names], names=names) - expected = MultiIndex.from_product([ - numbers, names.repeat(reps)], names=names) - tm.assert_index_equal(m.repeat(reps), expected) - - with tm.assert_produces_warning(FutureWarning): - result = m.repeat(n=reps) - tm.assert_index_equal(result, expected) - - -def test_numpy_repeat(): - reps = 2 - numbers = [1, 2, 3] - names = np.array(['foo', 'bar']) - - m = MultiIndex.from_product([ - numbers, names], names=names) - expected = MultiIndex.from_product([ - numbers, names.repeat(reps)], names=names) - tm.assert_index_equal(np.repeat(m, reps), expected) - - msg = "the 'axis' parameter is not supported" - tm.assert_raises_regex( - ValueError, msg, np.repeat, m, reps, axis=1) - - -def test_append_mixed_dtypes(): - # GH 13660 - dti = date_range('2011-01-01', freq='M', periods=3, ) - dti_tz = date_range('2011-01-01', freq='M', periods=3, tz='US/Eastern') - pi = period_range('2011-01', freq='M', periods=3) - - mi = MultiIndex.from_arrays([[1, 2, 3], - [1.1, np.nan, 3.3], - ['a', 'b', 'c'], - dti, dti_tz, pi]) - assert mi.nlevels == 6 - - res = mi.append(mi) - exp = MultiIndex.from_arrays([[1, 2, 3, 1, 2, 3], - [1.1, np.nan, 3.3, 1.1, np.nan, 3.3], - ['a', 'b', 'c', 'a', 'b', 'c'], - dti.append(dti), - dti_tz.append(dti_tz), - pi.append(pi)]) - tm.assert_index_equal(res, exp) - - other = MultiIndex.from_arrays([['x', 'y', 'z'], ['x', 'y', 'z'], - ['x', 'y', 'z'], ['x', 'y', 'z'], - ['x', 'y', 'z'], ['x', 'y', 'z']]) - - res = mi.append(other) - exp = MultiIndex.from_arrays([[1, 2, 3, 'x', 'y', 'z'], - [1.1, np.nan, 3.3, 'x', 'y', 'z'], - ['a', 'b', 'c', 'x', 'y', 'z'], - dti.append(pd.Index(['x', 'y', 'z'])), - dti_tz.append(pd.Index(['x', 'y', 'z'])), - pi.append(pd.Index(['x', 'y', 'z']))]) - tm.assert_index_equal(res, exp) - - -def test_take(idx): - indexer = [4, 3, 0, 2] - result = idx.take(indexer) - expected = idx[indexer] - assert result.equals(expected) - - if not isinstance(idx, - (DatetimeIndex, PeriodIndex, TimedeltaIndex)): - # GH 10791 - with pytest.raises(AttributeError): - idx.freq - - -def test_take_invalid_kwargs(idx): - idx = idx - indices = [1, 2] - - msg = r"take\(\) got an unexpected keyword argument 'foo'" - tm.assert_raises_regex(TypeError, msg, idx.take, - indices, foo=2) - - msg = "the 'out' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, idx.take, - indices, out=indices) - - msg = "the 'mode' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, idx.take, - indices, mode='clip') - - -def test_take_fill_value(): - # GH 12631 - vals = [['A', 'B'], - [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02')]] - idx = pd.MultiIndex.from_product(vals, names=['str', 'dt']) - - result = idx.take(np.array([1, 0, -1])) - exp_vals = [('A', pd.Timestamp('2011-01-02')), - ('A', pd.Timestamp('2011-01-01')), - ('B', pd.Timestamp('2011-01-02'))] - expected = pd.MultiIndex.from_tuples(exp_vals, names=['str', 'dt']) - tm.assert_index_equal(result, expected) - - # fill_value - result = idx.take(np.array([1, 0, -1]), fill_value=True) - exp_vals = [('A', pd.Timestamp('2011-01-02')), - ('A', pd.Timestamp('2011-01-01')), - (np.nan, pd.NaT)] - expected = pd.MultiIndex.from_tuples(exp_vals, names=['str', 'dt']) - tm.assert_index_equal(result, expected) - - # allow_fill=False - result = idx.take(np.array([1, 0, -1]), allow_fill=False, - fill_value=True) - exp_vals = [('A', pd.Timestamp('2011-01-02')), - ('A', pd.Timestamp('2011-01-01')), - ('B', pd.Timestamp('2011-01-02'))] - expected = pd.MultiIndex.from_tuples(exp_vals, names=['str', 'dt']) - tm.assert_index_equal(result, expected) - - msg = ('When allow_fill=True and fill_value is not None, ' - 'all indices must be >= -1') - with tm.assert_raises_regex(ValueError, msg): - idx.take(np.array([1, 0, -2]), fill_value=True) - with tm.assert_raises_regex(ValueError, msg): - idx.take(np.array([1, 0, -5]), fill_value=True) - - with pytest.raises(IndexError): - idx.take(np.array([1, -5])) - - -def test_iter(idx): - result = list(idx) - expected = [('foo', 'one'), ('foo', 'two'), ('bar', 'one'), - ('baz', 'two'), ('qux', 'one'), ('qux', 'two')] - assert result == expected - - -def test_sub(idx): - - first = idx - - # - now raises (previously was set op difference) - with pytest.raises(TypeError): - first - idx[-3:] - with pytest.raises(TypeError): - idx[-3:] - first - with pytest.raises(TypeError): - idx[-3:] - first.tolist() - with pytest.raises(TypeError): - first.tolist() - idx[-3:] - - -def test_argsort(idx): - result = idx.argsort() - expected = idx.values.argsort() - tm.assert_numpy_array_equal(result, expected) - - -def test_map(idx): - # callable - index = idx - - # we don't infer UInt64 - if isinstance(index, pd.UInt64Index): - expected = index.astype('int64') - else: - expected = index - - result = index.map(lambda x: x) - tm.assert_index_equal(result, expected) - - -@pytest.mark.parametrize( - "mapper", - [ - lambda values, idx: {i: e for e, i in zip(values, idx)}, - lambda values, idx: pd.Series(values, idx)]) -def test_map_dictlike(idx, mapper): - - if isinstance(idx, (pd.CategoricalIndex, pd.IntervalIndex)): - pytest.skip("skipping tests for {}".format(type(idx))) - - identity = mapper(idx.values, idx) - - # we don't infer to UInt64 for a dict - if isinstance(idx, pd.UInt64Index) and isinstance(identity, dict): - expected = idx.astype('int64') - else: - expected = idx - - result = idx.map(identity) - tm.assert_index_equal(result, expected) - - # empty mappable - expected = pd.Index([np.nan] * len(idx)) - result = idx.map(mapper(expected, idx)) - tm.assert_index_equal(result, expected) - - -def test_numpy_ufuncs(idx): - # test ufuncs of numpy 1.9.2. see: - # http://docs.scipy.org/doc/numpy/reference/ufuncs.html - - # some functions are skipped because it may return different result - # for unicode input depending on numpy version - - for func in [np.exp, np.exp2, np.expm1, np.log, np.log2, np.log10, - np.log1p, np.sqrt, np.sin, np.cos, np.tan, np.arcsin, - np.arccos, np.arctan, np.sinh, np.cosh, np.tanh, - np.arcsinh, np.arccosh, np.arctanh, np.deg2rad, - np.rad2deg]: - if isinstance(idx, DatetimeIndexOpsMixin): - # raise TypeError or ValueError (PeriodIndex) - # PeriodIndex behavior should be changed in future version - with pytest.raises(Exception): - with np.errstate(all='ignore'): - func(idx) - elif isinstance(idx, (Float64Index, Int64Index, UInt64Index)): - # coerces to float (e.g. np.sin) - with np.errstate(all='ignore'): - result = func(idx) - exp = Index(func(idx.values), name=idx.name) - - tm.assert_index_equal(result, exp) - assert isinstance(result, pd.Float64Index) - else: - # raise AttributeError or TypeError - if len(idx) == 0: - continue - else: - with pytest.raises(Exception): - with np.errstate(all='ignore'): - func(idx) - - for func in [np.isfinite, np.isinf, np.isnan, np.signbit]: - if isinstance(idx, DatetimeIndexOpsMixin): - # raise TypeError or ValueError (PeriodIndex) - with pytest.raises(Exception): - func(idx) - elif isinstance(idx, (Float64Index, Int64Index, UInt64Index)): - # Results in bool array - result = func(idx) - assert isinstance(result, np.ndarray) - assert not isinstance(result, Index) - else: - if len(idx) == 0: - continue - else: - with pytest.raises(Exception): - func(idx) diff --git a/pandas/tests/indexes/multi/test_reshape.py b/pandas/tests/indexes/multi/test_reshape.py new file mode 100644 index 0000000000000..85eec6a232180 --- /dev/null +++ b/pandas/tests/indexes/multi/test_reshape.py @@ -0,0 +1,130 @@ +# -*- coding: utf-8 -*- + + +import numpy as np +import pytest + +import pandas as pd +import pandas.util.testing as tm +from pandas import Index, MultiIndex + + +def test_insert(idx): + # key contained in all levels + new_index = idx.insert(0, ('bar', 'two')) + assert new_index.equal_levels(idx) + assert new_index[0] == ('bar', 'two') + + # key not contained in all levels + new_index = idx.insert(0, ('abc', 'three')) + + exp0 = Index(list(idx.levels[0]) + ['abc'], name='first') + tm.assert_index_equal(new_index.levels[0], exp0) + + exp1 = Index(list(idx.levels[1]) + ['three'], name='second') + tm.assert_index_equal(new_index.levels[1], exp1) + assert new_index[0] == ('abc', 'three') + + # key wrong length + msg = "Item must have length equal to number of levels" + with tm.assert_raises_regex(ValueError, msg): + idx.insert(0, ('foo2',)) + + left = pd.DataFrame([['a', 'b', 0], ['b', 'd', 1]], + columns=['1st', '2nd', '3rd']) + left.set_index(['1st', '2nd'], inplace=True) + ts = left['3rd'].copy(deep=True) + + left.loc[('b', 'x'), '3rd'] = 2 + left.loc[('b', 'a'), '3rd'] = -1 + left.loc[('b', 'b'), '3rd'] = 3 + left.loc[('a', 'x'), '3rd'] = 4 + left.loc[('a', 'w'), '3rd'] = 5 + left.loc[('a', 'a'), '3rd'] = 6 + + ts.loc[('b', 'x')] = 2 + ts.loc['b', 'a'] = -1 + ts.loc[('b', 'b')] = 3 + ts.loc['a', 'x'] = 4 + ts.loc[('a', 'w')] = 5 + ts.loc['a', 'a'] = 6 + + right = pd.DataFrame([['a', 'b', 0], ['b', 'd', 1], ['b', 'x', 2], + ['b', 'a', -1], ['b', 'b', 3], ['a', 'x', 4], + ['a', 'w', 5], ['a', 'a', 6]], + columns=['1st', '2nd', '3rd']) + right.set_index(['1st', '2nd'], inplace=True) + # FIXME data types changes to float because + # of intermediate nan insertion; + tm.assert_frame_equal(left, right, check_dtype=False) + tm.assert_series_equal(ts, right['3rd']) + + # GH9250 + idx = [('test1', i) for i in range(5)] + \ + [('test2', i) for i in range(6)] + \ + [('test', 17), ('test', 18)] + + left = pd.Series(np.linspace(0, 10, 11), + pd.MultiIndex.from_tuples(idx[:-2])) + + left.loc[('test', 17)] = 11 + left.loc[('test', 18)] = 12 + + right = pd.Series(np.linspace(0, 12, 13), + pd.MultiIndex.from_tuples(idx)) + + tm.assert_series_equal(left, right) + + +def test_append(idx): + result = idx[:3].append(idx[3:]) + assert result.equals(idx) + + foos = [idx[:1], idx[1:3], idx[3:]] + result = foos[0].append(foos[1:]) + assert result.equals(idx) + + # empty + result = idx.append([]) + assert result.equals(idx) + + +def test_repeat(): + reps = 2 + numbers = [1, 2, 3] + names = np.array(['foo', 'bar']) + + m = MultiIndex.from_product([ + numbers, names], names=names) + expected = MultiIndex.from_product([ + numbers, names.repeat(reps)], names=names) + tm.assert_index_equal(m.repeat(reps), expected) + + with tm.assert_produces_warning(FutureWarning): + result = m.repeat(n=reps) + tm.assert_index_equal(result, expected) + + +def test_insert_base(idx): + + result = idx[1:4] + + # test 0th element + assert idx[0:4].equals(result.insert(0, idx[0])) + + +def test_delete_base(idx): + + expected = idx[1:] + result = idx.delete(0) + assert result.equals(expected) + assert result.name == expected.name + + expected = idx[:-1] + result = idx.delete(-1) + assert result.equals(expected) + assert result.name == expected.name + + with pytest.raises((IndexError, ValueError)): + # either depending on numpy version + result = idx.delete(len(idx)) diff --git a/pandas/tests/indexes/multi/test_set_ops.py b/pandas/tests/indexes/multi/test_set_ops.py index 79a3837aac7f8..3f61cf2b6ff3f 100644 --- a/pandas/tests/indexes/multi/test_set_ops.py +++ b/pandas/tests/indexes/multi/test_set_ops.py @@ -1,11 +1,9 @@ # -*- coding: utf-8 -*- - import numpy as np import pandas as pd import pandas.util.testing as tm -from pandas import (CategoricalIndex, DatetimeIndex, MultiIndex, PeriodIndex, - Series, TimedeltaIndex) +from pandas import MultiIndex, Series def test_setops_errorcases(idx): @@ -27,29 +25,18 @@ def test_intersection_base(idx): second = idx[:3] intersect = first.intersection(second) - if isinstance(idx, CategoricalIndex): - pass - else: - assert tm.equalContents(intersect, second) + assert tm.equalContents(intersect, second) # GH 10149 cases = [klass(second.values) for klass in [np.array, Series, list]] for case in cases: - if isinstance(idx, PeriodIndex): - msg = "can only call with other PeriodIndex-ed objects" - with tm.assert_raises_regex(ValueError, msg): - result = first.intersection(case) - elif isinstance(idx, CategoricalIndex): - pass - else: - result = first.intersection(case) - assert tm.equalContents(result, second) - - if isinstance(idx, MultiIndex): - msg = "other must be a MultiIndex or a list of tuples" - with tm.assert_raises_regex(TypeError, msg): - result = first.intersection([1, 2, 3]) + result = first.intersection(case) + assert tm.equalContents(result, second) + + msg = "other must be a MultiIndex or a list of tuples" + with tm.assert_raises_regex(TypeError, msg): + result = first.intersection([1, 2, 3]) def test_union_base(idx): @@ -63,20 +50,12 @@ def test_union_base(idx): cases = [klass(second.values) for klass in [np.array, Series, list]] for case in cases: - if isinstance(idx, PeriodIndex): - msg = "can only call with other PeriodIndex-ed objects" - with tm.assert_raises_regex(ValueError, msg): - result = first.union(case) - elif isinstance(idx, CategoricalIndex): - pass - else: - result = first.union(case) - assert tm.equalContents(result, everything) - - if isinstance(idx, MultiIndex): - msg = "other must be a MultiIndex or a list of tuples" - with tm.assert_raises_regex(TypeError, msg): - result = first.union([1, 2, 3]) + result = first.union(case) + assert tm.equalContents(result, everything) + + msg = "other must be a MultiIndex or a list of tuples" + with tm.assert_raises_regex(TypeError, msg): + result = first.union([1, 2, 3]) def test_difference_base(idx): @@ -85,63 +64,37 @@ def test_difference_base(idx): answer = idx[4:] result = first.difference(second) - if isinstance(idx, CategoricalIndex): - pass - else: - assert tm.equalContents(result, answer) + assert tm.equalContents(result, answer) # GH 10149 cases = [klass(second.values) for klass in [np.array, Series, list]] for case in cases: - if isinstance(idx, PeriodIndex): - msg = "can only call with other PeriodIndex-ed objects" - with tm.assert_raises_regex(ValueError, msg): - result = first.difference(case) - elif isinstance(idx, CategoricalIndex): - pass - elif isinstance(idx, (DatetimeIndex, TimedeltaIndex)): - assert result.__class__ == answer.__class__ - tm.assert_numpy_array_equal(result.sort_values().asi8, - answer.sort_values().asi8) - else: - result = first.difference(case) - assert tm.equalContents(result, answer) - - if isinstance(idx, MultiIndex): - msg = "other must be a MultiIndex or a list of tuples" - with tm.assert_raises_regex(TypeError, msg): - result = first.difference([1, 2, 3]) + result = first.difference(case) + assert tm.equalContents(result, answer) + + msg = "other must be a MultiIndex or a list of tuples" + with tm.assert_raises_regex(TypeError, msg): + result = first.difference([1, 2, 3]) def test_symmetric_difference(idx): first = idx[1:] second = idx[:-1] - if isinstance(idx, CategoricalIndex): - pass - else: - answer = idx[[0, -1]] - result = first.symmetric_difference(second) - assert tm.equalContents(result, answer) + answer = idx[[0, -1]] + result = first.symmetric_difference(second) + assert tm.equalContents(result, answer) # GH 10149 cases = [klass(second.values) for klass in [np.array, Series, list]] for case in cases: - if isinstance(idx, PeriodIndex): - msg = "can only call with other PeriodIndex-ed objects" - with tm.assert_raises_regex(ValueError, msg): - result = first.symmetric_difference(case) - elif isinstance(idx, CategoricalIndex): - pass - else: - result = first.symmetric_difference(case) - assert tm.equalContents(result, answer) - - if isinstance(idx, MultiIndex): - msg = "other must be a MultiIndex or a list of tuples" - with tm.assert_raises_regex(TypeError, msg): - first.symmetric_difference([1, 2, 3]) + result = first.symmetric_difference(case) + assert tm.equalContents(result, answer) + + msg = "other must be a MultiIndex or a list of tuples" + with tm.assert_raises_regex(TypeError, msg): + first.symmetric_difference([1, 2, 3]) def test_empty(idx): diff --git a/pandas/tests/indexes/multi/test_sorting.py b/pandas/tests/indexes/multi/test_sorting.py index d6165c17c6717..ee29ea1be8aea 100644 --- a/pandas/tests/indexes/multi/test_sorting.py +++ b/pandas/tests/indexes/multi/test_sorting.py @@ -215,7 +215,8 @@ def test_reconstruct_remove_unused(): @pytest.mark.parametrize('first_type,second_type', [ ('int64', 'int64'), - ('datetime64[D]', 'str')]) + ('datetime64[D]', 'str') +]) def test_remove_unused_levels_large(first_type, second_type): # GH16556 @@ -254,3 +255,9 @@ def test_remove_unused_nan(level0, level1): tm.assert_index_equal(result, mi) for level in 0, 1: assert('unused' not in result.levels[level]) + + +def test_argsort(idx): + result = idx.argsort() + expected = idx.values.argsort() + tm.assert_numpy_array_equal(result, expected)