pandas-dev · jreback · Dec 25, 2018 · Dec 24, 2018 · Dec 24, 2018 · Dec 24, 2018
diff --git a/pandas/tests/indexing/multiindex/test_chaining_and_caching.py b/pandas/tests/indexing/multiindex/test_chaining_and_caching.py
@@ -0,0 +1,65 @@
+import numpy as np
+import pytest
+
+from pandas.compat import lrange, lzip, range
+
+from pandas import DataFrame, MultiIndex, Series
+from pandas.core import common as com
+import pandas.util.testing as tm
+
+
+def test_detect_chained_assignment():
+    # Inplace ops, originally from:
+    # http://stackoverflow.com/questions/20508968/series-fillna-in-a-multiindex-dataframe-does-not-fill-is-this-a-bug
+    a = [12, 23]
+    b = [123, None]
+    c = [1234, 2345]
+    d = [12345, 23456]
+    tuples = [('eyes', 'left'), ('eyes', 'right'), ('ears', 'left'),
+              ('ears', 'right')]
+    events = {('eyes', 'left'): a,
+              ('eyes', 'right'): b,
+              ('ears', 'left'): c,
+              ('ears', 'right'): d}
+    multiind = MultiIndex.from_tuples(tuples, names=['part', 'side'])
+    zed = DataFrame(events, index=['a', 'b'], columns=multiind)
+
+    with pytest.raises(com.SettingWithCopyError):
+        zed['eyes']['right'].fillna(value=555, inplace=True)
+
+
+def test_cache_updating():
+    # 5216
+    # make sure that we don't try to set a dead cache
+    a = np.random.rand(10, 3)
+    df = DataFrame(a, columns=['x', 'y', 'z'])
+    tuples = [(i, j) for i in range(5) for j in range(2)]
+    index = MultiIndex.from_tuples(tuples)
+    df.index = index
+
+    # setting via chained assignment
+    # but actually works, since everything is a view
+    df.loc[0]['z'].iloc[0] = 1.
+    result = df.loc[(0, 0), 'z']
+    assert result == 1
+
+    # correct setting
+    df.loc[(0, 0), 'z'] = 2
+    result = df.loc[(0, 0), 'z']
+    assert result == 2
+
+
+def test_indexer_caching():
+    # GH5727
+    # make sure that indexers are in the _internal_names_set
+    n = 1000001
+    arrays = [lrange(n), lrange(n)]
+    index = MultiIndex.from_tuples(lzip(*arrays))
+    s = Series(np.zeros(n), index=index)
+    str(s)
+
+    # setitem
+    expected = Series(np.ones(n), index=index)
+    s = Series(np.zeros(n), index=index)
+    s[s == 0] = 1
+    tm.assert_series_equal(s, expected)
diff --git a/pandas/tests/indexing/multiindex/test_getitem.py b/pandas/tests/indexing/multiindex/test_getitem.py
@@ -1,7 +1,7 @@
 import numpy as np
 import pytest
 
-from pandas.compat import lrange, range, u, zip
+from pandas.compat import StringIO, lrange, range, u, zip
 
 import pandas as pd
 from pandas import DataFrame, Index, MultiIndex, Series
@@ -343,3 +343,43 @@ def test_mixed_depth_get(unicode_strings):
     expected = df['routine1', 'result1', '']
     expected = expected.rename(('routine1', 'result1'))
     tm.assert_series_equal(result, expected)
+
+
+def test_mi_access():
+
+    # GH 4145
+    data = """h1 main  h3 sub  h5
+0  a    A   1  A1   1
+1  b    B   2  B1   2
+2  c    B   3  A1   3
+3  d    A   4  B2   4
+4  e    A   5  B2   5
+5  f    B   6  A2   6
+"""
+
+    df = pd.read_csv(StringIO(data), sep=r'\s+', index_col=0)
+    df2 = df.set_index(['main', 'sub']).T.sort_index(1)
+    index = Index(['h1', 'h3', 'h5'])
+    columns = MultiIndex.from_tuples([('A', 'A1')], names=['main', 'sub'])
+    expected = DataFrame([['a', 1, 1]], index=columns, columns=index).T
+
+    result = df2.loc[:, ('A', 'A1')]
+    tm.assert_frame_equal(result, expected)
+
+    result = df2[('A', 'A1')]
+    tm.assert_frame_equal(result, expected)
+
+    # GH 4146, not returning a block manager when selecting a unique index
+    # from a duplicate index
+    # as of 4879, this returns a Series (which is similar to what happens
+    # with a non-unique)
+    expected = Series(['a', 1, 1], index=['h1', 'h3', 'h5'], name='A1')
+    result = df2['A']['A1']
+    tm.assert_series_equal(result, expected)
+
+    # selecting a non_unique from the 2nd level
+    expected = DataFrame([['d', 4, 4], ['e', 5, 5]],
+                         index=Index(['B2', 'B2'], name='sub'),
+                         columns=['h1', 'h3', 'h5'], ).T
+    result = df2['A']['B2']
+    tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/indexing/multiindex/test_iloc.py b/pandas/tests/indexing/multiindex/test_iloc.py
@@ -115,3 +115,30 @@ def test_iloc_integer_locations():
     result = DataFrame([[df.iloc[r, c] for c in range(2)] for r in range(5)])
 
     tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    'data, indexes, values, expected_k', [
+        # test without indexer value in first level of MultiIndex
+        ([[2, 22, 5], [2, 33, 6]], [0, -1, 1], [2, 3, 1], [7, 10]),
+        # test like code sample 1 in the issue
+        ([[1, 22, 555], [1, 33, 666]], [0, -1, 1], [200, 300, 100],
+            [755, 1066]),
+        # test like code sample 2 in the issue
+        ([[1, 3, 7], [2, 4, 8]], [0, -1, 1], [10, 10, 1000], [17, 1018]),
+        # test like code sample 3 in the issue
+        ([[1, 11, 4], [2, 22, 5], [3, 33, 6]], [0, -1, 1], [4, 7, 10],
+            [8, 15, 13])
+    ])
+def test_iloc_setitem_int_multiindex_series(data, indexes, values, expected_k):
+    # GH17148
+    df = DataFrame(data=data, columns=['i', 'j', 'k'])
+    df = df.set_index(['i', 'j'])
+
+    series = df.k.copy()
+    for i, v in zip(indexes, values):
+        series.iloc[i] += v
+
+    df['k'] = expected_k
+    expected = df.k
+    tm.assert_series_equal(series, expected)
diff --git a/pandas/tests/indexing/multiindex/test_indexing_slow.py b/pandas/tests/indexing/multiindex/test_indexing_slow.py
@@ -0,0 +1,90 @@
+# -*- coding: utf-8 -*-
+
+import warnings
+
+import numpy as np
+import pytest
+
+import pandas as pd
+from pandas.core.api import DataFrame, MultiIndex, Series
+import pandas.util.testing as tm
+
+
+class TestIndexingSlow(object):
+
+    @pytest.mark.slow
+    @pytest.mark.filterwarnings("ignore::pandas.errors.PerformanceWarning")
+    def test_multiindex_get_loc(self):  # GH7724, GH2646
+
+        with warnings.catch_warnings(record=True):
+
+            # test indexing into a multi-index before & past the lexsort depth
+            from numpy.random import randint, choice, randn
+            cols = ['jim', 'joe', 'jolie', 'joline', 'jolia']
+
+            def validate(mi, df, key):
+                mask = np.ones(len(df)).astype('bool')
+
+                # test for all partials of this key
+                for i, k in enumerate(key):
+                    mask &= df.iloc[:, i] == k
+
+                    if not mask.any():
+                        assert key[:i + 1] not in mi.index
+                        continue
+
+                    assert key[:i + 1] in mi.index
+                    right = df[mask].copy()
+
+                    if i + 1 != len(key):  # partial key
+                        right.drop(cols[:i + 1], axis=1, inplace=True)
+                        right.set_index(cols[i + 1:-1], inplace=True)
+                        tm.assert_frame_equal(mi.loc[key[:i + 1]], right)
+
+                    else:  # full key
+                        right.set_index(cols[:-1], inplace=True)
+                        if len(right) == 1:  # single hit
+                            right = Series(right['jolia'].values,
+                                           name=right.index[0],
+                                           index=['jolia'])
+                            tm.assert_series_equal(mi.loc[key[:i + 1]], right)
+                        else:  # multi hit
+                            tm.assert_frame_equal(mi.loc[key[:i + 1]], right)
+
+            def loop(mi, df, keys):
+                for key in keys:
+                    validate(mi, df, key)
+
+            n, m = 1000, 50
+
+            vals = [randint(0, 10, n), choice(
+                list('abcdefghij'), n), choice(
+                    pd.date_range('20141009', periods=10).tolist(), n), choice(
+                        list('ZYXWVUTSRQ'), n), randn(n)]
+            vals = list(map(tuple, zip(*vals)))
+
+            # bunch of keys for testing
+            keys = [randint(0, 11, m), choice(
+                list('abcdefghijk'), m), choice(
+                    pd.date_range('20141009', periods=11).tolist(), m), choice(
+                        list('ZYXWVUTSRQP'), m)]
+            keys = list(map(tuple, zip(*keys)))
+            keys += list(map(lambda t: t[:-1], vals[::n // m]))
+
+            # covers both unique index and non-unique index
+            df = DataFrame(vals, columns=cols)
+            a, b = pd.concat([df, df]), df.drop_duplicates(subset=cols[:-1])
+
+            for frame in a, b:
+                for i in range(5):  # lexsort depth
+                    df = frame.copy() if i == 0 else frame.sort_values(
+                        by=cols[:i])
+                    mi = df.set_index(cols[:-1])
+                    assert not mi.index.lexsort_depth < i
+                    loop(mi, df, keys)
+
+    @pytest.mark.slow
+    def test_large_mi_dataframe_indexing(self):
+        # GH10645
+        result = MultiIndex.from_arrays([range(10 ** 6), range(10 ** 6)])
+        assert (not (10 ** 6, 0) in result)
diff --git a/pandas/tests/indexing/multiindex/test_ix.py b/pandas/tests/indexing/multiindex/test_ix.py
@@ -3,6 +3,10 @@
 import pytest
 
 from pandas.compat import lrange
+from pandas.errors import PerformanceWarning
+
+from pandas import DataFrame, MultiIndex
+from pandas.util import testing as tm
 
 
 @pytest.mark.filterwarnings("ignore:\\n.ix:DeprecationWarning")
@@ -25,3 +29,28 @@ def test_frame_setitem_ix(self, multiindex_dataframe_random_data):
             df.columns = lrange(3)
             df.ix[('bar', 'two'), 1] = 7
         assert df.loc[('bar', 'two'), 1] == 7
+
+    def test_ix_general(self):
+
+        # ix general issues
+
+        # GH 2817
+        data = {'amount': {0: 700, 1: 600, 2: 222, 3: 333, 4: 444},
+                'col': {0: 3.5, 1: 3.5, 2: 4.0, 3: 4.0, 4: 4.0},
+                'year': {0: 2012, 1: 2011, 2: 2012, 3: 2012, 4: 2012}}
+        df = DataFrame(data).set_index(keys=['col', 'year'])
+        key = 4.0, 2012
+
+        # emits a PerformanceWarning, ok
+        with tm.assert_produces_warning(PerformanceWarning):
+            tm.assert_frame_equal(df.loc[key], df.iloc[2:])
+
+        # this is ok
+        df.sort_index(inplace=True)
+        res = df.loc[key]
+
+        # col has float dtype, result should be Float64Index
+        index = MultiIndex.from_arrays([[4.] * 3, [2012] * 3],
+                                       names=['col', 'year'])
+        expected = DataFrame({'amount': [222, 333, 444]}, index=index)
+        tm.assert_frame_equal(res, expected)
diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py
@@ -1,9 +1,10 @@
+import itertools
 from warnings import catch_warnings
 
 import numpy as np
 import pytest
 
-from pandas import DataFrame, MultiIndex, Series
+from pandas import DataFrame, Index, MultiIndex, Series
 from pandas.util import testing as tm
 
 
@@ -175,3 +176,74 @@ def test_get_loc_single_level(self, single_level_multiindex):
                    index=single_level)
         for k in single_level.values:
             s[k]
+
+    def test_loc_getitem_int_slice(self):
+        # GH 3053
+        # loc should treat integer slices like label slices
+
+        index = MultiIndex.from_tuples([t for t in itertools.product(
+            [6, 7, 8], ['a', 'b'])])
+        df = DataFrame(np.random.randn(6, 6), index, index)
+        result = df.loc[6:8, :]
+        expected = df
+        tm.assert_frame_equal(result, expected)
+
+        index = MultiIndex.from_tuples([t
+                                        for t in itertools.product(
+                                            [10, 20, 30], ['a', 'b'])])
+        df = DataFrame(np.random.randn(6, 6), index, index)
+        result = df.loc[20:30, :]
+        expected = df.iloc[2:]
+        tm.assert_frame_equal(result, expected)
+
+        # doc examples
+        result = df.loc[10, :]
+        expected = df.iloc[0:2]
+        expected.index = ['a', 'b']
+        tm.assert_frame_equal(result, expected)
+
+        result = df.loc[:, 10]
+        # expected = df.ix[:,10] (this fails)
+        expected = df[10]
+        tm.assert_frame_equal(result, expected)
+
+    @pytest.mark.parametrize(
+        'indexer_type_1',
+        (list, tuple, set, slice, np.ndarray, Series, Index))
+    @pytest.mark.parametrize(
+        'indexer_type_2',
+        (list, tuple, set, slice, np.ndarray, Series, Index))
+    def test_loc_getitem_nested_indexer(self, indexer_type_1, indexer_type_2):
+        # GH #19686
+        # .loc should work with nested indexers which can be
+        # any list-like objects (see `pandas.api.types.is_list_like`) or slices
+
+        def convert_nested_indexer(indexer_type, keys):
+            if indexer_type == np.ndarray:
+                return np.array(keys)
+            if indexer_type == slice:
+                return slice(*keys)
+            return indexer_type(keys)
+
+        a = [10, 20, 30]
+        b = [1, 2, 3]
+        index = MultiIndex.from_product([a, b])
+        df = DataFrame(
+            np.arange(len(index), dtype='int64'),
+            index=index, columns=['Data'])
+
+        keys = ([10, 20], [2, 3])
+        types = (indexer_type_1, indexer_type_2)
+
+        # check indexers with all the combinations of nested objects
+        # of all the valid types
+        indexer = tuple(
+            convert_nested_indexer(indexer_type, k)
+            for indexer_type, k in zip(types, keys))
+
+        result = df.loc[indexer, 'Data']
+        expected = Series(
+            [1, 2, 4, 5], name='Data',
+            index=MultiIndex.from_product(keys))
+
+        tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/indexing/multiindex/test_multiindex.py b/pandas/tests/indexing/multiindex/test_multiindex.py
@@ -6,7 +6,7 @@
 from pandas.errors import PerformanceWarning
 
 import pandas as pd
-from pandas import DataFrame, MultiIndex, Series
+from pandas import DataFrame, Index, MultiIndex, Series
 from pandas.util import testing as tm
 
 
@@ -69,3 +69,18 @@ def test_indexing_over_hashtable_size_cutoff(self):
         assert s[("a", 7)] == 7
 
         _index._SIZE_CUTOFF = old_cutoff
+
+    def test_multi_nan_indexing(self):
+
+        # GH 3588
+        df = DataFrame({"a": ['R1', 'R2', np.nan, 'R4'],
+                        'b': ["C1", "C2", "C3", "C4"],
+                        "c": [10, 15, np.nan, 20]})
+        result = df.set_index(['a', 'b'], drop=False)
+        expected = DataFrame({"a": ['R1', 'R2', np.nan, 'R4'],
+                              'b': ["C1", "C2", "C3", "C4"],
+                              "c": [10, 15, np.nan, 20]},
+                             index=[Index(['R1', 'R2', np.nan, 'R4'],
+                                          name='a'),
+                                    Index(['C1', 'C2', 'C3', 'C4'], name='b')])
+        tm.assert_frame_equal(result, expected)