diff --git a/pandas/tests/indexing/multiindex/test_chaining_and_caching.py b/pandas/tests/indexing/multiindex/test_chaining_and_caching.py new file mode 100644 index 0000000000000..0ff499155f0c4 --- /dev/null +++ b/pandas/tests/indexing/multiindex/test_chaining_and_caching.py @@ -0,0 +1,65 @@ +import numpy as np +import pytest + +from pandas.compat import lrange, lzip, range + +from pandas import DataFrame, MultiIndex, Series +from pandas.core import common as com +import pandas.util.testing as tm + + +def test_detect_chained_assignment(): + # Inplace ops, originally from: + # http://stackoverflow.com/questions/20508968/series-fillna-in-a-multiindex-dataframe-does-not-fill-is-this-a-bug + a = [12, 23] + b = [123, None] + c = [1234, 2345] + d = [12345, 23456] + tuples = [('eyes', 'left'), ('eyes', 'right'), ('ears', 'left'), + ('ears', 'right')] + events = {('eyes', 'left'): a, + ('eyes', 'right'): b, + ('ears', 'left'): c, + ('ears', 'right'): d} + multiind = MultiIndex.from_tuples(tuples, names=['part', 'side']) + zed = DataFrame(events, index=['a', 'b'], columns=multiind) + + with pytest.raises(com.SettingWithCopyError): + zed['eyes']['right'].fillna(value=555, inplace=True) + + +def test_cache_updating(): + # 5216 + # make sure that we don't try to set a dead cache + a = np.random.rand(10, 3) + df = DataFrame(a, columns=['x', 'y', 'z']) + tuples = [(i, j) for i in range(5) for j in range(2)] + index = MultiIndex.from_tuples(tuples) + df.index = index + + # setting via chained assignment + # but actually works, since everything is a view + df.loc[0]['z'].iloc[0] = 1. + result = df.loc[(0, 0), 'z'] + assert result == 1 + + # correct setting + df.loc[(0, 0), 'z'] = 2 + result = df.loc[(0, 0), 'z'] + assert result == 2 + + +def test_indexer_caching(): + # GH5727 + # make sure that indexers are in the _internal_names_set + n = 1000001 + arrays = [lrange(n), lrange(n)] + index = MultiIndex.from_tuples(lzip(*arrays)) + s = Series(np.zeros(n), index=index) + str(s) + + # setitem + expected = Series(np.ones(n), index=index) + s = Series(np.zeros(n), index=index) + s[s == 0] = 1 + tm.assert_series_equal(s, expected) diff --git a/pandas/tests/indexing/multiindex/test_getitem.py b/pandas/tests/indexing/multiindex/test_getitem.py index 6354757de9bb5..8d3997c878b83 100644 --- a/pandas/tests/indexing/multiindex/test_getitem.py +++ b/pandas/tests/indexing/multiindex/test_getitem.py @@ -1,7 +1,7 @@ import numpy as np import pytest -from pandas.compat import lrange, range, u, zip +from pandas.compat import StringIO, lrange, range, u, zip import pandas as pd from pandas import DataFrame, Index, MultiIndex, Series @@ -343,3 +343,43 @@ def test_mixed_depth_get(unicode_strings): expected = df['routine1', 'result1', ''] expected = expected.rename(('routine1', 'result1')) tm.assert_series_equal(result, expected) + + +def test_mi_access(): + + # GH 4145 + data = """h1 main h3 sub h5 +0 a A 1 A1 1 +1 b B 2 B1 2 +2 c B 3 A1 3 +3 d A 4 B2 4 +4 e A 5 B2 5 +5 f B 6 A2 6 +""" + + df = pd.read_csv(StringIO(data), sep=r'\s+', index_col=0) + df2 = df.set_index(['main', 'sub']).T.sort_index(1) + index = Index(['h1', 'h3', 'h5']) + columns = MultiIndex.from_tuples([('A', 'A1')], names=['main', 'sub']) + expected = DataFrame([['a', 1, 1]], index=columns, columns=index).T + + result = df2.loc[:, ('A', 'A1')] + tm.assert_frame_equal(result, expected) + + result = df2[('A', 'A1')] + tm.assert_frame_equal(result, expected) + + # GH 4146, not returning a block manager when selecting a unique index + # from a duplicate index + # as of 4879, this returns a Series (which is similar to what happens + # with a non-unique) + expected = Series(['a', 1, 1], index=['h1', 'h3', 'h5'], name='A1') + result = df2['A']['A1'] + tm.assert_series_equal(result, expected) + + # selecting a non_unique from the 2nd level + expected = DataFrame([['d', 4, 4], ['e', 5, 5]], + index=Index(['B2', 'B2'], name='sub'), + columns=['h1', 'h3', 'h5'], ).T + result = df2['A']['B2'] + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/indexing/multiindex/test_iloc.py b/pandas/tests/indexing/multiindex/test_iloc.py index b72b2aedf2037..a1681c1239aa3 100644 --- a/pandas/tests/indexing/multiindex/test_iloc.py +++ b/pandas/tests/indexing/multiindex/test_iloc.py @@ -115,3 +115,30 @@ def test_iloc_integer_locations(): result = DataFrame([[df.iloc[r, c] for c in range(2)] for r in range(5)]) tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + 'data, indexes, values, expected_k', [ + # test without indexer value in first level of MultiIndex + ([[2, 22, 5], [2, 33, 6]], [0, -1, 1], [2, 3, 1], [7, 10]), + # test like code sample 1 in the issue + ([[1, 22, 555], [1, 33, 666]], [0, -1, 1], [200, 300, 100], + [755, 1066]), + # test like code sample 2 in the issue + ([[1, 3, 7], [2, 4, 8]], [0, -1, 1], [10, 10, 1000], [17, 1018]), + # test like code sample 3 in the issue + ([[1, 11, 4], [2, 22, 5], [3, 33, 6]], [0, -1, 1], [4, 7, 10], + [8, 15, 13]) + ]) +def test_iloc_setitem_int_multiindex_series(data, indexes, values, expected_k): + # GH17148 + df = DataFrame(data=data, columns=['i', 'j', 'k']) + df = df.set_index(['i', 'j']) + + series = df.k.copy() + for i, v in zip(indexes, values): + series.iloc[i] += v + + df['k'] = expected_k + expected = df.k + tm.assert_series_equal(series, expected) diff --git a/pandas/tests/indexing/multiindex/test_indexing_slow.py b/pandas/tests/indexing/multiindex/test_indexing_slow.py new file mode 100644 index 0000000000000..1fdd42e307733 --- /dev/null +++ b/pandas/tests/indexing/multiindex/test_indexing_slow.py @@ -0,0 +1,89 @@ +# -*- coding: utf-8 -*- + +import warnings + +import numpy as np +import pytest + +import pandas as pd +from pandas import DataFrame, MultiIndex, Series +import pandas.util.testing as tm + + +@pytest.mark.slow +@pytest.mark.filterwarnings("ignore::pandas.errors.PerformanceWarning") +def test_multiindex_get_loc(): # GH7724, GH2646 + + with warnings.catch_warnings(record=True): + + # test indexing into a multi-index before & past the lexsort depth + from numpy.random import randint, choice, randn + cols = ['jim', 'joe', 'jolie', 'joline', 'jolia'] + + def validate(mi, df, key): + mask = np.ones(len(df)).astype('bool') + + # test for all partials of this key + for i, k in enumerate(key): + mask &= df.iloc[:, i] == k + + if not mask.any(): + assert key[:i + 1] not in mi.index + continue + + assert key[:i + 1] in mi.index + right = df[mask].copy() + + if i + 1 != len(key): # partial key + right.drop(cols[:i + 1], axis=1, inplace=True) + right.set_index(cols[i + 1:-1], inplace=True) + tm.assert_frame_equal(mi.loc[key[:i + 1]], right) + + else: # full key + right.set_index(cols[:-1], inplace=True) + if len(right) == 1: # single hit + right = Series(right['jolia'].values, + name=right.index[0], + index=['jolia']) + tm.assert_series_equal(mi.loc[key[:i + 1]], right) + else: # multi hit + tm.assert_frame_equal(mi.loc[key[:i + 1]], right) + + def loop(mi, df, keys): + for key in keys: + validate(mi, df, key) + + n, m = 1000, 50 + + vals = [randint(0, 10, n), choice( + list('abcdefghij'), n), choice( + pd.date_range('20141009', periods=10).tolist(), n), choice( + list('ZYXWVUTSRQ'), n), randn(n)] + vals = list(map(tuple, zip(*vals))) + + # bunch of keys for testing + keys = [randint(0, 11, m), choice( + list('abcdefghijk'), m), choice( + pd.date_range('20141009', periods=11).tolist(), m), choice( + list('ZYXWVUTSRQP'), m)] + keys = list(map(tuple, zip(*keys))) + keys += list(map(lambda t: t[:-1], vals[::n // m])) + + # covers both unique index and non-unique index + df = DataFrame(vals, columns=cols) + a, b = pd.concat([df, df]), df.drop_duplicates(subset=cols[:-1]) + + for frame in a, b: + for i in range(5): # lexsort depth + df = frame.copy() if i == 0 else frame.sort_values( + by=cols[:i]) + mi = df.set_index(cols[:-1]) + assert not mi.index.lexsort_depth < i + loop(mi, df, keys) + + +@pytest.mark.slow +def test_large_mi_dataframe_indexing(): + # GH10645 + result = MultiIndex.from_arrays([range(10 ** 6), range(10 ** 6)]) + assert (not (10 ** 6, 0) in result) diff --git a/pandas/tests/indexing/multiindex/test_ix.py b/pandas/tests/indexing/multiindex/test_ix.py index 4e4e5674fdbd5..4970190252e30 100644 --- a/pandas/tests/indexing/multiindex/test_ix.py +++ b/pandas/tests/indexing/multiindex/test_ix.py @@ -3,6 +3,10 @@ import pytest from pandas.compat import lrange +from pandas.errors import PerformanceWarning + +from pandas import DataFrame, MultiIndex +from pandas.util import testing as tm @pytest.mark.filterwarnings("ignore:\\n.ix:DeprecationWarning") @@ -25,3 +29,28 @@ def test_frame_setitem_ix(self, multiindex_dataframe_random_data): df.columns = lrange(3) df.ix[('bar', 'two'), 1] = 7 assert df.loc[('bar', 'two'), 1] == 7 + + def test_ix_general(self): + + # ix general issues + + # GH 2817 + data = {'amount': {0: 700, 1: 600, 2: 222, 3: 333, 4: 444}, + 'col': {0: 3.5, 1: 3.5, 2: 4.0, 3: 4.0, 4: 4.0}, + 'year': {0: 2012, 1: 2011, 2: 2012, 3: 2012, 4: 2012}} + df = DataFrame(data).set_index(keys=['col', 'year']) + key = 4.0, 2012 + + # emits a PerformanceWarning, ok + with tm.assert_produces_warning(PerformanceWarning): + tm.assert_frame_equal(df.loc[key], df.iloc[2:]) + + # this is ok + df.sort_index(inplace=True) + res = df.loc[key] + + # col has float dtype, result should be Float64Index + index = MultiIndex.from_arrays([[4.] * 3, [2012] * 3], + names=['col', 'year']) + expected = DataFrame({'amount': [222, 333, 444]}, index=index) + tm.assert_frame_equal(res, expected) diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index 47a46bc05d0d9..75995a24a2ad1 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -1,9 +1,10 @@ +import itertools from warnings import catch_warnings import numpy as np import pytest -from pandas import DataFrame, MultiIndex, Series +from pandas import DataFrame, Index, MultiIndex, Series from pandas.util import testing as tm @@ -175,3 +176,74 @@ def test_get_loc_single_level(self, single_level_multiindex): index=single_level) for k in single_level.values: s[k] + + def test_loc_getitem_int_slice(self): + # GH 3053 + # loc should treat integer slices like label slices + + index = MultiIndex.from_tuples([t for t in itertools.product( + [6, 7, 8], ['a', 'b'])]) + df = DataFrame(np.random.randn(6, 6), index, index) + result = df.loc[6:8, :] + expected = df + tm.assert_frame_equal(result, expected) + + index = MultiIndex.from_tuples([t + for t in itertools.product( + [10, 20, 30], ['a', 'b'])]) + df = DataFrame(np.random.randn(6, 6), index, index) + result = df.loc[20:30, :] + expected = df.iloc[2:] + tm.assert_frame_equal(result, expected) + + # doc examples + result = df.loc[10, :] + expected = df.iloc[0:2] + expected.index = ['a', 'b'] + tm.assert_frame_equal(result, expected) + + result = df.loc[:, 10] + # expected = df.ix[:,10] (this fails) + expected = df[10] + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + 'indexer_type_1', + (list, tuple, set, slice, np.ndarray, Series, Index)) + @pytest.mark.parametrize( + 'indexer_type_2', + (list, tuple, set, slice, np.ndarray, Series, Index)) + def test_loc_getitem_nested_indexer(self, indexer_type_1, indexer_type_2): + # GH #19686 + # .loc should work with nested indexers which can be + # any list-like objects (see `pandas.api.types.is_list_like`) or slices + + def convert_nested_indexer(indexer_type, keys): + if indexer_type == np.ndarray: + return np.array(keys) + if indexer_type == slice: + return slice(*keys) + return indexer_type(keys) + + a = [10, 20, 30] + b = [1, 2, 3] + index = MultiIndex.from_product([a, b]) + df = DataFrame( + np.arange(len(index), dtype='int64'), + index=index, columns=['Data']) + + keys = ([10, 20], [2, 3]) + types = (indexer_type_1, indexer_type_2) + + # check indexers with all the combinations of nested objects + # of all the valid types + indexer = tuple( + convert_nested_indexer(indexer_type, k) + for indexer_type, k in zip(types, keys)) + + result = df.loc[indexer, 'Data'] + expected = Series( + [1, 2, 4, 5], name='Data', + index=MultiIndex.from_product(keys)) + + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/indexing/multiindex/test_multiindex.py b/pandas/tests/indexing/multiindex/test_multiindex.py index 1fefbc0b0f8ca..4f5517f89e852 100644 --- a/pandas/tests/indexing/multiindex/test_multiindex.py +++ b/pandas/tests/indexing/multiindex/test_multiindex.py @@ -6,7 +6,7 @@ from pandas.errors import PerformanceWarning import pandas as pd -from pandas import DataFrame, MultiIndex, Series +from pandas import DataFrame, Index, MultiIndex, Series from pandas.util import testing as tm @@ -69,3 +69,18 @@ def test_indexing_over_hashtable_size_cutoff(self): assert s[("a", 7)] == 7 _index._SIZE_CUTOFF = old_cutoff + + def test_multi_nan_indexing(self): + + # GH 3588 + df = DataFrame({"a": ['R1', 'R2', np.nan, 'R4'], + 'b': ["C1", "C2", "C3", "C4"], + "c": [10, 15, np.nan, 20]}) + result = df.set_index(['a', 'b'], drop=False) + expected = DataFrame({"a": ['R1', 'R2', np.nan, 'R4'], + 'b': ["C1", "C2", "C3", "C4"], + "c": [10, 15, np.nan, 20]}, + index=[Index(['R1', 'R2', np.nan, 'R4'], + name='a'), + Index(['C1', 'C2', 'C3', 'C4'], name='b')]) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/indexing/multiindex/test_setitem.py b/pandas/tests/indexing/multiindex/test_setitem.py index 91ffd308e6793..d49ca34edd0fd 100644 --- a/pandas/tests/indexing/multiindex/test_setitem.py +++ b/pandas/tests/indexing/multiindex/test_setitem.py @@ -396,3 +396,15 @@ def test_nonunique_assignment_1750(self): df.loc[ix, "C"] = '_' assert (df.xs((1, 1))['C'] == '_').all() + + def test_astype_assignment_with_dups(self): + + # GH 4686 + # assignment with dups that has a dtype change + cols = MultiIndex.from_tuples([('A', '1'), ('B', '1'), ('A', '2')]) + df = DataFrame(np.arange(3).reshape((1, 3)), + columns=cols, dtype=object) + index = df.index.copy() + + df['A'] = df['A'].astype(np.float64) + tm.assert_index_equal(df.index, index) diff --git a/pandas/tests/indexing/test_chaining_and_caching.py b/pandas/tests/indexing/test_chaining_and_caching.py index f012c9c255cd9..e38c1b16b3b60 100644 --- a/pandas/tests/indexing/test_chaining_and_caching.py +++ b/pandas/tests/indexing/test_chaining_and_caching.py @@ -3,8 +3,7 @@ import pandas as pd from pandas import ( - DataFrame, MultiIndex, Series, Timestamp, compat, date_range, - option_context) + DataFrame, Series, Timestamp, compat, date_range, option_context) from pandas.core import common as com from pandas.util import testing as tm @@ -253,24 +252,6 @@ def random_text(nobs=100): assert df._is_copy is None df['a'] += 1 - # Inplace ops, originally from: - # http://stackoverflow.com/questions/20508968/series-fillna-in-a-multiindex-dataframe-does-not-fill-is-this-a-bug - a = [12, 23] - b = [123, None] - c = [1234, 2345] - d = [12345, 23456] - tuples = [('eyes', 'left'), ('eyes', 'right'), ('ears', 'left'), - ('ears', 'right')] - events = {('eyes', 'left'): a, - ('eyes', 'right'): b, - ('ears', 'left'): c, - ('ears', 'right'): d} - multiind = MultiIndex.from_tuples(tuples, names=['part', 'side']) - zed = DataFrame(events, index=['a', 'b'], columns=multiind) - - with pytest.raises(com.SettingWithCopyError): - zed['eyes']['right'].fillna(value=555, inplace=True) - df = DataFrame(np.random.randn(10, 4)) s = df.iloc[:, 0].sort_values() @@ -392,25 +373,6 @@ def test_cache_updating(self): assert "A+1" in panel.ix[0].columns assert "A+1" in panel.ix[1].columns - # 5216 - # make sure that we don't try to set a dead cache - a = np.random.rand(10, 3) - df = DataFrame(a, columns=['x', 'y', 'z']) - tuples = [(i, j) for i in range(5) for j in range(2)] - index = MultiIndex.from_tuples(tuples) - df.index = index - - # setting via chained assignment - # but actually works, since everything is a view - df.loc[0]['z'].iloc[0] = 1. - result = df.loc[(0, 0), 'z'] - assert result == 1 - - # correct setting - df.loc[(0, 0), 'z'] = 2 - result = df.loc[(0, 0), 'z'] - assert result == 2 - # 10264 df = DataFrame(np.zeros((5, 5), dtype='int64'), columns=[ 'a', 'b', 'c', 'd', 'e'], index=range(5)) diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 85b06001cf8a0..a867387db4b46 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -296,33 +296,6 @@ def test_iloc_setitem(self): expected = Series([0, 1, 0], index=[4, 5, 6]) tm.assert_series_equal(s, expected) - @pytest.mark.parametrize( - 'data, indexes, values, expected_k', [ - # test without indexer value in first level of MultiIndex - ([[2, 22, 5], [2, 33, 6]], [0, -1, 1], [2, 3, 1], [7, 10]), - # test like code sample 1 in the issue - ([[1, 22, 555], [1, 33, 666]], [0, -1, 1], [200, 300, 100], - [755, 1066]), - # test like code sample 2 in the issue - ([[1, 3, 7], [2, 4, 8]], [0, -1, 1], [10, 10, 1000], [17, 1018]), - # test like code sample 3 in the issue - ([[1, 11, 4], [2, 22, 5], [3, 33, 6]], [0, -1, 1], [4, 7, 10], - [8, 15, 13]) - ]) - def test_iloc_setitem_int_multiindex_series( - self, data, indexes, values, expected_k): - # GH17148 - df = DataFrame(data=data, columns=['i', 'j', 'k']) - df = df.set_index(['i', 'j']) - - series = df.k.copy() - for i, v in zip(indexes, values): - series.iloc[i] += v - - df['k'] = expected_k - expected = df.k - tm.assert_series_equal(series, expected) - def test_iloc_setitem_list(self): # setitem with an iloc list diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 3c36dee310a28..03f1975c50d2a 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -10,12 +10,12 @@ import numpy as np import pytest -from pandas.compat import PY2, StringIO, lrange, lzip, range +from pandas.compat import PY2, lrange, range from pandas.core.dtypes.common import is_float_dtype, is_integer_dtype import pandas as pd -from pandas import DataFrame, Index, MultiIndex, NaT, Series +from pandas import DataFrame, Index, NaT, Series from pandas.core.indexing import ( _maybe_numeric_slice, _non_reducing_slice, validate_indices) from pandas.tests.indexing.common import Base, _mklbl @@ -337,21 +337,6 @@ def test_set_index_nan(self): columns=df.columns) tm.assert_frame_equal(result, df) - def test_multi_nan_indexing(self): - - # GH 3588 - df = DataFrame({"a": ['R1', 'R2', np.nan, 'R4'], - 'b': ["C1", "C2", "C3", "C4"], - "c": [10, 15, np.nan, 20]}) - result = df.set_index(['a', 'b'], drop=False) - expected = DataFrame({"a": ['R1', 'R2', np.nan, 'R4'], - 'b': ["C1", "C2", "C3", "C4"], - "c": [10, 15, np.nan, 20]}, - index=[Index(['R1', 'R2', np.nan, 'R4'], - name='a'), - Index(['C1', 'C2', 'C3', 'C4'], name='b')]) - tm.assert_frame_equal(result, expected) - def test_multi_assign(self): # GH 3626, an assignment of a sub-df to a df @@ -482,45 +467,6 @@ def test_string_slice(self): with pytest.raises(KeyError): df.loc['2011', 0] - def test_mi_access(self): - - # GH 4145 - data = """h1 main h3 sub h5 -0 a A 1 A1 1 -1 b B 2 B1 2 -2 c B 3 A1 3 -3 d A 4 B2 4 -4 e A 5 B2 5 -5 f B 6 A2 6 -""" - - df = pd.read_csv(StringIO(data), sep=r'\s+', index_col=0) - df2 = df.set_index(['main', 'sub']).T.sort_index(1) - index = Index(['h1', 'h3', 'h5']) - columns = MultiIndex.from_tuples([('A', 'A1')], names=['main', 'sub']) - expected = DataFrame([['a', 1, 1]], index=columns, columns=index).T - - result = df2.loc[:, ('A', 'A1')] - tm.assert_frame_equal(result, expected) - - result = df2[('A', 'A1')] - tm.assert_frame_equal(result, expected) - - # GH 4146, not returning a block manager when selecting a unique index - # from a duplicate index - # as of 4879, this returns a Series (which is similar to what happens - # with a non-unique) - expected = Series(['a', 1, 1], index=['h1', 'h3', 'h5'], name='A1') - result = df2['A']['A1'] - tm.assert_series_equal(result, expected) - - # selecting a non_unique from the 2nd level - expected = DataFrame([['d', 4, 4], ['e', 5, 5]], - index=Index(['B2', 'B2'], name='sub'), - columns=['h1', 'h3', 'h5'], ).T - result = df2['A']['B2'] - tm.assert_frame_equal(result, expected) - def test_astype_assignment(self): # GH4312 (iloc) @@ -563,22 +509,6 @@ def test_astype_assignment(self): expected = DataFrame({'A': [1, 2, 3, 4]}) tm.assert_frame_equal(df, expected) - def test_astype_assignment_with_dups(self): - - # GH 4686 - # assignment with dups that has a dtype change - cols = MultiIndex.from_tuples([('A', '1'), ('B', '1'), ('A', '2')]) - df = DataFrame(np.arange(3).reshape((1, 3)), - columns=cols, dtype=object) - index = df.index.copy() - - df['A'] = df['A'].astype(np.float64) - tm.assert_index_equal(df.index, index) - - # TODO(wesm): unused variables - # result = df.get_dtype_counts().sort_index() - # expected = Series({'float64': 2, 'object': 1}).sort_index() - @pytest.mark.parametrize("index,val", [ (Index([0, 1, 2]), 2), (Index([0, 1, '2']), '2'), @@ -698,21 +628,6 @@ def test_index_type_coercion(self): class TestMisc(Base): - def test_indexer_caching(self): - # GH5727 - # make sure that indexers are in the _internal_names_set - n = 1000001 - arrays = [lrange(n), lrange(n)] - index = MultiIndex.from_tuples(lzip(*arrays)) - s = Series(np.zeros(n), index=index) - str(s) - - # setitem - expected = Series(np.ones(n), index=index) - s = Series(np.zeros(n), index=index) - s[s == 0] = 1 - tm.assert_series_equal(s, expected) - def test_float_index_to_mixed(self): df = DataFrame({0.0: np.random.rand(10), 1.0: np.random.rand(10)}) df['a'] = 10 diff --git a/pandas/tests/indexing/test_indexing_slow.py b/pandas/tests/indexing/test_indexing_slow.py index 5fd1431ac210c..42263c813ddab 100644 --- a/pandas/tests/indexing/test_indexing_slow.py +++ b/pandas/tests/indexing/test_indexing_slow.py @@ -1,88 +1,13 @@ # -*- coding: utf-8 -*- -import warnings - -import numpy as np import pytest -import pandas as pd -from pandas.core.api import DataFrame, MultiIndex, Series +from pandas import DataFrame import pandas.util.testing as tm class TestIndexingSlow(object): - @pytest.mark.slow - @pytest.mark.filterwarnings("ignore::pandas.errors.PerformanceWarning") - def test_multiindex_get_loc(self): # GH7724, GH2646 - - with warnings.catch_warnings(record=True): - - # test indexing into a multi-index before & past the lexsort depth - from numpy.random import randint, choice, randn - cols = ['jim', 'joe', 'jolie', 'joline', 'jolia'] - - def validate(mi, df, key): - mask = np.ones(len(df)).astype('bool') - - # test for all partials of this key - for i, k in enumerate(key): - mask &= df.iloc[:, i] == k - - if not mask.any(): - assert key[:i + 1] not in mi.index - continue - - assert key[:i + 1] in mi.index - right = df[mask].copy() - - if i + 1 != len(key): # partial key - right.drop(cols[:i + 1], axis=1, inplace=True) - right.set_index(cols[i + 1:-1], inplace=True) - tm.assert_frame_equal(mi.loc[key[:i + 1]], right) - - else: # full key - right.set_index(cols[:-1], inplace=True) - if len(right) == 1: # single hit - right = Series(right['jolia'].values, - name=right.index[0], - index=['jolia']) - tm.assert_series_equal(mi.loc[key[:i + 1]], right) - else: # multi hit - tm.assert_frame_equal(mi.loc[key[:i + 1]], right) - - def loop(mi, df, keys): - for key in keys: - validate(mi, df, key) - - n, m = 1000, 50 - - vals = [randint(0, 10, n), choice( - list('abcdefghij'), n), choice( - pd.date_range('20141009', periods=10).tolist(), n), choice( - list('ZYXWVUTSRQ'), n), randn(n)] - vals = list(map(tuple, zip(*vals))) - - # bunch of keys for testing - keys = [randint(0, 11, m), choice( - list('abcdefghijk'), m), choice( - pd.date_range('20141009', periods=11).tolist(), m), choice( - list('ZYXWVUTSRQP'), m)] - keys = list(map(tuple, zip(*keys))) - keys += list(map(lambda t: t[:-1], vals[::n // m])) - - # covers both unique index and non-unique index - df = DataFrame(vals, columns=cols) - a, b = pd.concat([df, df]), df.drop_duplicates(subset=cols[:-1]) - - for frame in a, b: - for i in range(5): # lexsort depth - df = frame.copy() if i == 0 else frame.sort_values( - by=cols[:i]) - mi = df.set_index(cols[:-1]) - assert not mi.index.lexsort_depth < i - loop(mi, df, keys) - @pytest.mark.slow def test_large_dataframe_indexing(self): # GH10692 @@ -90,9 +15,3 @@ def test_large_dataframe_indexing(self): result.loc[len(result)] = len(result) + 1 expected = DataFrame({'x': range(10 ** 6 + 1)}, dtype='int64') tm.assert_frame_equal(result, expected) - - @pytest.mark.slow - def test_large_mi_dataframe_indexing(self): - # GH10645 - result = MultiIndex.from_arrays([range(10 ** 6), range(10 ** 6)]) - assert (not (10 ** 6, 0) in result) diff --git a/pandas/tests/indexing/test_ix.py b/pandas/tests/indexing/test_ix.py index 850f80241a477..35805bce07705 100644 --- a/pandas/tests/indexing/test_ix.py +++ b/pandas/tests/indexing/test_ix.py @@ -6,12 +6,11 @@ import pytest from pandas.compat import lrange -from pandas.errors import PerformanceWarning from pandas.core.dtypes.common import is_scalar import pandas as pd -from pandas import DataFrame, MultiIndex, Series, option_context +from pandas import DataFrame, Series, option_context from pandas.util import testing as tm @@ -179,31 +178,6 @@ def test_ix_weird_slicing(self): 4: 5}}) tm.assert_frame_equal(df, expected) - def test_ix_general(self): - - # ix general issues - - # GH 2817 - data = {'amount': {0: 700, 1: 600, 2: 222, 3: 333, 4: 444}, - 'col': {0: 3.5, 1: 3.5, 2: 4.0, 3: 4.0, 4: 4.0}, - 'year': {0: 2012, 1: 2011, 2: 2012, 3: 2012, 4: 2012}} - df = DataFrame(data).set_index(keys=['col', 'year']) - key = 4.0, 2012 - - # emits a PerformanceWarning, ok - with tm.assert_produces_warning(PerformanceWarning): - tm.assert_frame_equal(df.loc[key], df.iloc[2:]) - - # this is ok - df.sort_index(inplace=True) - res = df.loc[key] - - # col has float dtype, result should be Float64Index - index = MultiIndex.from_arrays([[4.] * 3, [2012] * 3], - names=['col', 'year']) - expected = DataFrame({'amount': [222, 333, 444]}, index=index) - tm.assert_frame_equal(res, expected) - def test_ix_assign_column_mixed(self): # GH #1142 df = DataFrame(tm.getSeriesData()) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 21bb624790328..17e107c7a1130 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1,6 +1,5 @@ """ test label based indexing with loc """ -import itertools from warnings import catch_warnings, filterwarnings import numpy as np @@ -9,7 +8,7 @@ from pandas.compat import PY2, StringIO, lrange import pandas as pd -from pandas import DataFrame, Index, MultiIndex, Series, Timestamp, date_range +from pandas import DataFrame, Series, Timestamp, date_range from pandas.api.types import is_scalar from pandas.tests.indexing.common import Base from pandas.util import testing as tm @@ -226,35 +225,6 @@ def test_loc_getitem_int_slice(self): self.check_result('int slice2', 'loc', slice(4, 8), 'ix', [4, 8], typs=['ints', 'uints'], axes=2) - # GH 3053 - # loc should treat integer slices like label slices - - index = MultiIndex.from_tuples([t for t in itertools.product( - [6, 7, 8], ['a', 'b'])]) - df = DataFrame(np.random.randn(6, 6), index, index) - result = df.loc[6:8, :] - expected = df - tm.assert_frame_equal(result, expected) - - index = MultiIndex.from_tuples([t - for t in itertools.product( - [10, 20, 30], ['a', 'b'])]) - df = DataFrame(np.random.randn(6, 6), index, index) - result = df.loc[20:30, :] - expected = df.iloc[2:] - tm.assert_frame_equal(result, expected) - - # doc examples - result = df.loc[10, :] - expected = df.iloc[0:2] - expected.index = ['a', 'b'] - tm.assert_frame_equal(result, expected) - - result = df.loc[:, 10] - # expected = df.ix[:,10] (this fails) - expected = df[10] - tm.assert_frame_equal(result, expected) - def test_loc_to_fail(self): # GH3449 @@ -745,47 +715,6 @@ def test_identity_slice_returns_new_object(self): original_series[:3] = [7, 8, 9] assert all(sliced_series[:3] == [7, 8, 9]) - @pytest.mark.parametrize( - 'indexer_type_1', - (list, tuple, set, slice, np.ndarray, Series, Index)) - @pytest.mark.parametrize( - 'indexer_type_2', - (list, tuple, set, slice, np.ndarray, Series, Index)) - def test_loc_getitem_nested_indexer(self, indexer_type_1, indexer_type_2): - # GH #19686 - # .loc should work with nested indexers which can be - # any list-like objects (see `pandas.api.types.is_list_like`) or slices - - def convert_nested_indexer(indexer_type, keys): - if indexer_type == np.ndarray: - return np.array(keys) - if indexer_type == slice: - return slice(*keys) - return indexer_type(keys) - - a = [10, 20, 30] - b = [1, 2, 3] - index = pd.MultiIndex.from_product([a, b]) - df = pd.DataFrame( - np.arange(len(index), dtype='int64'), - index=index, columns=['Data']) - - keys = ([10, 20], [2, 3]) - types = (indexer_type_1, indexer_type_2) - - # check indexers with all the combinations of nested objects - # of all the valid types - indexer = tuple( - convert_nested_indexer(indexer_type, k) - for indexer_type, k in zip(types, keys)) - - result = df.loc[indexer, 'Data'] - expected = pd.Series( - [1, 2, 4, 5], name='Data', - index=pd.MultiIndex.from_product(keys)) - - tm.assert_series_equal(result, expected) - def test_loc_uint64(self): # GH20722 # Test whether loc accept uint64 max value as index.