From 9264f104e990d3dbf50910a1b16372ec0103295b Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 18 Oct 2020 15:13:37 -0700 Subject: [PATCH 1/2] TST: tests for #21168, #27420, #15928, #30053 --- pandas/tests/indexes/multi/test_indexing.py | 18 +++++++- pandas/tests/indexing/multiindex/test_loc.py | 48 ++++++++++++++++++++ 2 files changed, 65 insertions(+), 1 deletion(-) diff --git a/pandas/tests/indexes/multi/test_indexing.py b/pandas/tests/indexes/multi/test_indexing.py index 6b27682ed5674..cf494b2ce87cc 100644 --- a/pandas/tests/indexes/multi/test_indexing.py +++ b/pandas/tests/indexes/multi/test_indexing.py @@ -3,7 +3,7 @@ import numpy as np import pytest -from pandas.errors import InvalidIndexError +from pandas.errors import InvalidIndexError, PerformanceWarning import pandas as pd from pandas import Categorical, Index, MultiIndex, date_range @@ -646,6 +646,22 @@ def test_get_loc_duplicates2(self): assert index.get_loc("D") == slice(0, 3) + def test_get_loc_past_lexsort_depth(self): + # GH#30053 + idx = MultiIndex( + levels=[["a"], [0, 7], [1]], + codes=[[0, 0], [1, 0], [0, 0]], + names=["x", "y", "z"], + sortorder=0, + ) + key = ("a", 7) + + with tm.assert_produces_warning(PerformanceWarning): + # PerformanceWarning: indexing past lexsort depth may impact performance + result = idx.get_loc(key) + + assert result == slice(0, 1, None) + class TestWhere: def test_where(self): diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index 1b659bec0e9e8..ea6795ba513d5 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -1,3 +1,5 @@ +import os + import numpy as np import pytest @@ -522,3 +524,49 @@ def test_loc_with_mi_indexer(): columns=["author", "price"], ) tm.assert_frame_equal(result, expected) + + +def test_getitem_str_slice(): + # GH#15928 + path = os.path.join( + pd.__path__[0], "tests", "reshape", "merge", "data", "quotes2.csv" + ) + df = pd.read_csv(path, parse_dates=["time"]) + df2 = df.set_index(["ticker", "time"]).sort_index() + + res = df2.loc[("AAPL", slice("2016-05-25 13:30:00")), :].droplevel(0) + expected = df2.loc["AAPL"].loc[slice("2016-05-25 13:30:00"), :] + tm.assert_frame_equal(res, expected) + + +def test_3levels_leading_period_index(): + # GH#24091 + pi = pd.PeriodIndex( + ["20181101 1100", "20181101 1200", "20181102 1300", "20181102 1400"], + name="datetime", + freq="B", + ) + lev2 = ["A", "A", "Z", "W"] + lev3 = ["B", "C", "Q", "F"] + mi = pd.MultiIndex.from_arrays([pi, lev2, lev3]) + + ser = pd.Series(range(4), index=mi, dtype=np.float64) + result = ser.loc[(pi[0], "A", "B")] + assert result == 0.0 + + +class TestKeyErrorsWithMultiIndex: + def test_missing_keys_raises_keyerror(self): + # GH#27420 KeyError, not TypeError + df = pd.DataFrame(np.arange(12).reshape(4, 3), columns=["A", "B", "C"]) + df2 = df.set_index(["A", "B"]) + + with pytest.raises(KeyError, match="1"): + df2.loc[(1, 6)] + + def test_missing_key_raises_keyerror2(self): + # GH#21168 KeyError, not "IndexingError: Too many indexers" + ser = pd.Series(-1, index=pd.MultiIndex.from_product([[0, 1]] * 2)) + + with pytest.raises(KeyError, match=r"\(0, 3\)"): + ser.loc[0, 3] From 87738ff7a260a8b0202af43f3f9bb2db776921dc Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 19 Oct 2020 16:52:26 -0700 Subject: [PATCH 2/2] use datapath --- pandas/tests/indexing/multiindex/test_loc.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index ea6795ba513d5..518ec9e997183 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -1,5 +1,3 @@ -import os - import numpy as np import pytest @@ -526,11 +524,9 @@ def test_loc_with_mi_indexer(): tm.assert_frame_equal(result, expected) -def test_getitem_str_slice(): +def test_getitem_str_slice(datapath): # GH#15928 - path = os.path.join( - pd.__path__[0], "tests", "reshape", "merge", "data", "quotes2.csv" - ) + path = datapath("reshape", "merge", "data", "quotes2.csv") df = pd.read_csv(path, parse_dates=["time"]) df2 = df.set_index(["ticker", "time"]).sort_index()