Skip to content

TST: move mi tests from tests/indexing/ to tests/indexing/multiindex/ #24417

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
Dec 25, 2018
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 65 additions & 0 deletions pandas/tests/indexing/multiindex/test_chaining_and_caching.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
import numpy as np
import pytest

from pandas.compat import lrange, lzip, range

from pandas import DataFrame, MultiIndex, Series
from pandas.core import common as com
import pandas.util.testing as tm


def test_detect_chained_assignment():
# Inplace ops, originally from:
# http://stackoverflow.com/questions/20508968/series-fillna-in-a-multiindex-dataframe-does-not-fill-is-this-a-bug
a = [12, 23]
b = [123, None]
c = [1234, 2345]
d = [12345, 23456]
tuples = [('eyes', 'left'), ('eyes', 'right'), ('ears', 'left'),
('ears', 'right')]
events = {('eyes', 'left'): a,
('eyes', 'right'): b,
('ears', 'left'): c,
('ears', 'right'): d}
multiind = MultiIndex.from_tuples(tuples, names=['part', 'side'])
zed = DataFrame(events, index=['a', 'b'], columns=multiind)

with pytest.raises(com.SettingWithCopyError):
zed['eyes']['right'].fillna(value=555, inplace=True)


def test_cache_updating():
# 5216
# make sure that we don't try to set a dead cache
a = np.random.rand(10, 3)
df = DataFrame(a, columns=['x', 'y', 'z'])
tuples = [(i, j) for i in range(5) for j in range(2)]
index = MultiIndex.from_tuples(tuples)
df.index = index

# setting via chained assignment
# but actually works, since everything is a view
df.loc[0]['z'].iloc[0] = 1.
result = df.loc[(0, 0), 'z']
assert result == 1

# correct setting
df.loc[(0, 0), 'z'] = 2
result = df.loc[(0, 0), 'z']
assert result == 2


def test_indexer_caching():
# GH5727
# make sure that indexers are in the _internal_names_set
n = 1000001
arrays = [lrange(n), lrange(n)]
index = MultiIndex.from_tuples(lzip(*arrays))
s = Series(np.zeros(n), index=index)
str(s)

# setitem
expected = Series(np.ones(n), index=index)
s = Series(np.zeros(n), index=index)
s[s == 0] = 1
tm.assert_series_equal(s, expected)
42 changes: 41 additions & 1 deletion pandas/tests/indexing/multiindex/test_getitem.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import numpy as np
import pytest

from pandas.compat import lrange, range, u, zip
from pandas.compat import StringIO, lrange, range, u, zip

import pandas as pd
from pandas import DataFrame, Index, MultiIndex, Series
Expand Down Expand Up @@ -343,3 +343,43 @@ def test_mixed_depth_get(unicode_strings):
expected = df['routine1', 'result1', '']
expected = expected.rename(('routine1', 'result1'))
tm.assert_series_equal(result, expected)


def test_mi_access():

# GH 4145
data = """h1 main h3 sub h5
0 a A 1 A1 1
1 b B 2 B1 2
2 c B 3 A1 3
3 d A 4 B2 4
4 e A 5 B2 5
5 f B 6 A2 6
"""

df = pd.read_csv(StringIO(data), sep=r'\s+', index_col=0)
df2 = df.set_index(['main', 'sub']).T.sort_index(1)
index = Index(['h1', 'h3', 'h5'])
columns = MultiIndex.from_tuples([('A', 'A1')], names=['main', 'sub'])
expected = DataFrame([['a', 1, 1]], index=columns, columns=index).T

result = df2.loc[:, ('A', 'A1')]
tm.assert_frame_equal(result, expected)

result = df2[('A', 'A1')]
tm.assert_frame_equal(result, expected)

# GH 4146, not returning a block manager when selecting a unique index
# from a duplicate index
# as of 4879, this returns a Series (which is similar to what happens
# with a non-unique)
expected = Series(['a', 1, 1], index=['h1', 'h3', 'h5'], name='A1')
result = df2['A']['A1']
tm.assert_series_equal(result, expected)

# selecting a non_unique from the 2nd level
expected = DataFrame([['d', 4, 4], ['e', 5, 5]],
index=Index(['B2', 'B2'], name='sub'),
columns=['h1', 'h3', 'h5'], ).T
result = df2['A']['B2']
tm.assert_frame_equal(result, expected)
27 changes: 27 additions & 0 deletions pandas/tests/indexing/multiindex/test_iloc.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,3 +115,30 @@ def test_iloc_integer_locations():
result = DataFrame([[df.iloc[r, c] for c in range(2)] for r in range(5)])

tm.assert_frame_equal(result, expected)


@pytest.mark.parametrize(
'data, indexes, values, expected_k', [
# test without indexer value in first level of MultiIndex
([[2, 22, 5], [2, 33, 6]], [0, -1, 1], [2, 3, 1], [7, 10]),
# test like code sample 1 in the issue
([[1, 22, 555], [1, 33, 666]], [0, -1, 1], [200, 300, 100],
[755, 1066]),
# test like code sample 2 in the issue
([[1, 3, 7], [2, 4, 8]], [0, -1, 1], [10, 10, 1000], [17, 1018]),
# test like code sample 3 in the issue
([[1, 11, 4], [2, 22, 5], [3, 33, 6]], [0, -1, 1], [4, 7, 10],
[8, 15, 13])
])
def test_iloc_setitem_int_multiindex_series(data, indexes, values, expected_k):
# GH17148
df = DataFrame(data=data, columns=['i', 'j', 'k'])
df = df.set_index(['i', 'j'])

series = df.k.copy()
for i, v in zip(indexes, values):
series.iloc[i] += v

df['k'] = expected_k
expected = df.k
tm.assert_series_equal(series, expected)
90 changes: 90 additions & 0 deletions pandas/tests/indexing/multiindex/test_indexing_slow.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
# -*- coding: utf-8 -*-

import warnings

import numpy as np
import pytest

import pandas as pd
from pandas.core.api import DataFrame, MultiIndex, Series
import pandas.util.testing as tm


class TestIndexingSlow(object):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this needed to be class based?


@pytest.mark.slow
@pytest.mark.filterwarnings("ignore::pandas.errors.PerformanceWarning")
def test_multiindex_get_loc(self): # GH7724, GH2646

with warnings.catch_warnings(record=True):

# test indexing into a multi-index before & past the lexsort depth
from numpy.random import randint, choice, randn
cols = ['jim', 'joe', 'jolie', 'joline', 'jolia']

def validate(mi, df, key):
mask = np.ones(len(df)).astype('bool')

# test for all partials of this key
for i, k in enumerate(key):
mask &= df.iloc[:, i] == k

if not mask.any():
assert key[:i + 1] not in mi.index
continue

assert key[:i + 1] in mi.index
right = df[mask].copy()

if i + 1 != len(key): # partial key
right.drop(cols[:i + 1], axis=1, inplace=True)
right.set_index(cols[i + 1:-1], inplace=True)
tm.assert_frame_equal(mi.loc[key[:i + 1]], right)

else: # full key
right.set_index(cols[:-1], inplace=True)
if len(right) == 1: # single hit
right = Series(right['jolia'].values,
name=right.index[0],
index=['jolia'])
tm.assert_series_equal(mi.loc[key[:i + 1]], right)
else: # multi hit
tm.assert_frame_equal(mi.loc[key[:i + 1]], right)

def loop(mi, df, keys):
for key in keys:
validate(mi, df, key)

n, m = 1000, 50

vals = [randint(0, 10, n), choice(
list('abcdefghij'), n), choice(
pd.date_range('20141009', periods=10).tolist(), n), choice(
list('ZYXWVUTSRQ'), n), randn(n)]
vals = list(map(tuple, zip(*vals)))

# bunch of keys for testing
keys = [randint(0, 11, m), choice(
list('abcdefghijk'), m), choice(
pd.date_range('20141009', periods=11).tolist(), m), choice(
list('ZYXWVUTSRQP'), m)]
keys = list(map(tuple, zip(*keys)))
keys += list(map(lambda t: t[:-1], vals[::n // m]))

# covers both unique index and non-unique index
df = DataFrame(vals, columns=cols)
a, b = pd.concat([df, df]), df.drop_duplicates(subset=cols[:-1])

for frame in a, b:
for i in range(5): # lexsort depth
df = frame.copy() if i == 0 else frame.sort_values(
by=cols[:i])
mi = df.set_index(cols[:-1])
assert not mi.index.lexsort_depth < i
loop(mi, df, keys)

@pytest.mark.slow
def test_large_mi_dataframe_indexing(self):
# GH10645
result = MultiIndex.from_arrays([range(10 ** 6), range(10 ** 6)])
assert (not (10 ** 6, 0) in result)
29 changes: 29 additions & 0 deletions pandas/tests/indexing/multiindex/test_ix.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@
import pytest

from pandas.compat import lrange
from pandas.errors import PerformanceWarning

from pandas import DataFrame, MultiIndex
from pandas.util import testing as tm


@pytest.mark.filterwarnings("ignore:\\n.ix:DeprecationWarning")
Expand All @@ -25,3 +29,28 @@ def test_frame_setitem_ix(self, multiindex_dataframe_random_data):
df.columns = lrange(3)
df.ix[('bar', 'two'), 1] = 7
assert df.loc[('bar', 'two'), 1] == 7

def test_ix_general(self):

# ix general issues

# GH 2817
data = {'amount': {0: 700, 1: 600, 2: 222, 3: 333, 4: 444},
'col': {0: 3.5, 1: 3.5, 2: 4.0, 3: 4.0, 4: 4.0},
'year': {0: 2012, 1: 2011, 2: 2012, 3: 2012, 4: 2012}}
df = DataFrame(data).set_index(keys=['col', 'year'])
key = 4.0, 2012

# emits a PerformanceWarning, ok
with tm.assert_produces_warning(PerformanceWarning):
tm.assert_frame_equal(df.loc[key], df.iloc[2:])

# this is ok
df.sort_index(inplace=True)
res = df.loc[key]

# col has float dtype, result should be Float64Index
index = MultiIndex.from_arrays([[4.] * 3, [2012] * 3],
names=['col', 'year'])
expected = DataFrame({'amount': [222, 333, 444]}, index=index)
tm.assert_frame_equal(res, expected)
74 changes: 73 additions & 1 deletion pandas/tests/indexing/multiindex/test_loc.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import itertools
from warnings import catch_warnings

import numpy as np
import pytest

from pandas import DataFrame, MultiIndex, Series
from pandas import DataFrame, Index, MultiIndex, Series
from pandas.util import testing as tm


Expand Down Expand Up @@ -175,3 +176,74 @@ def test_get_loc_single_level(self, single_level_multiindex):
index=single_level)
for k in single_level.values:
s[k]

def test_loc_getitem_int_slice(self):
# GH 3053
# loc should treat integer slices like label slices

index = MultiIndex.from_tuples([t for t in itertools.product(
[6, 7, 8], ['a', 'b'])])
df = DataFrame(np.random.randn(6, 6), index, index)
result = df.loc[6:8, :]
expected = df
tm.assert_frame_equal(result, expected)

index = MultiIndex.from_tuples([t
for t in itertools.product(
[10, 20, 30], ['a', 'b'])])
df = DataFrame(np.random.randn(6, 6), index, index)
result = df.loc[20:30, :]
expected = df.iloc[2:]
tm.assert_frame_equal(result, expected)

# doc examples
result = df.loc[10, :]
expected = df.iloc[0:2]
expected.index = ['a', 'b']
tm.assert_frame_equal(result, expected)

result = df.loc[:, 10]
# expected = df.ix[:,10] (this fails)
expected = df[10]
tm.assert_frame_equal(result, expected)

@pytest.mark.parametrize(
'indexer_type_1',
(list, tuple, set, slice, np.ndarray, Series, Index))
@pytest.mark.parametrize(
'indexer_type_2',
(list, tuple, set, slice, np.ndarray, Series, Index))
def test_loc_getitem_nested_indexer(self, indexer_type_1, indexer_type_2):
# GH #19686
# .loc should work with nested indexers which can be
# any list-like objects (see `pandas.api.types.is_list_like`) or slices

def convert_nested_indexer(indexer_type, keys):
if indexer_type == np.ndarray:
return np.array(keys)
if indexer_type == slice:
return slice(*keys)
return indexer_type(keys)

a = [10, 20, 30]
b = [1, 2, 3]
index = MultiIndex.from_product([a, b])
df = DataFrame(
np.arange(len(index), dtype='int64'),
index=index, columns=['Data'])

keys = ([10, 20], [2, 3])
types = (indexer_type_1, indexer_type_2)

# check indexers with all the combinations of nested objects
# of all the valid types
indexer = tuple(
convert_nested_indexer(indexer_type, k)
for indexer_type, k in zip(types, keys))

result = df.loc[indexer, 'Data']
expected = Series(
[1, 2, 4, 5], name='Data',
index=MultiIndex.from_product(keys))

tm.assert_series_equal(result, expected)
17 changes: 16 additions & 1 deletion pandas/tests/indexing/multiindex/test_multiindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from pandas.errors import PerformanceWarning

import pandas as pd
from pandas import DataFrame, MultiIndex, Series
from pandas import DataFrame, Index, MultiIndex, Series
from pandas.util import testing as tm


Expand Down Expand Up @@ -69,3 +69,18 @@ def test_indexing_over_hashtable_size_cutoff(self):
assert s[("a", 7)] == 7

_index._SIZE_CUTOFF = old_cutoff

def test_multi_nan_indexing(self):

# GH 3588
df = DataFrame({"a": ['R1', 'R2', np.nan, 'R4'],
'b': ["C1", "C2", "C3", "C4"],
"c": [10, 15, np.nan, 20]})
result = df.set_index(['a', 'b'], drop=False)
expected = DataFrame({"a": ['R1', 'R2', np.nan, 'R4'],
'b': ["C1", "C2", "C3", "C4"],
"c": [10, 15, np.nan, 20]},
index=[Index(['R1', 'R2', np.nan, 'R4'],
name='a'),
Index(['C1', 'C2', 'C3', 'C4'], name='b')])
tm.assert_frame_equal(result, expected)
Loading