Skip to content

TST: parametrize slow test #37339

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Oct 22, 2020
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 42 additions & 40 deletions pandas/tests/indexing/multiindex/test_indexing_slow.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,46 @@
from pandas import DataFrame, Series
import pandas._testing as tm

m = 50
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

shouldnt u do this in a function where u set the random seed? not sure it's a big deal

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we dont set randomseed in this module. i dont think it makes real difference

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

right but this should still be in a function and just create a fixture out of it

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

actually this is fine, i realize its the only thing in this file.

n = 1000
cols = ["jim", "joe", "jolie", "joline", "jolia"]

vals = [
np.random.randint(0, 10, n),
np.random.choice(list("abcdefghij"), n),
np.random.choice(pd.date_range("20141009", periods=10).tolist(), n),
np.random.choice(list("ZYXWVUTSRQ"), n),
np.random.randn(n),
]
vals = list(map(tuple, zip(*vals)))

# bunch of keys for testing
keys = [
np.random.randint(0, 11, m),
np.random.choice(list("abcdefghijk"), m),
np.random.choice(pd.date_range("20141009", periods=11).tolist(), m),
np.random.choice(list("ZYXWVUTSRQP"), m),
]
keys = list(map(tuple, zip(*keys)))
keys += list(map(lambda t: t[:-1], vals[:: n // m]))


# covers both unique index and non-unique index
df = DataFrame(vals, columns=cols)
a = pd.concat([df, df])
b = df.drop_duplicates(subset=cols[:-1])


@pytest.mark.slow
@pytest.mark.filterwarnings("ignore::pandas.errors.PerformanceWarning")
def test_multiindex_get_loc(): # GH7724, GH2646
@pytest.mark.parametrize("lexsort_depth", list(range(5)))
@pytest.mark.parametrize("key", keys)
@pytest.mark.parametrize("frame", [a, b])
def test_multiindex_get_loc(lexsort_depth, key, frame):
# GH7724, GH2646

with warnings.catch_warnings(record=True):

# test indexing into a multi-index before & past the lexsort depth
from numpy.random import choice, randint, randn

cols = ["jim", "joe", "jolie", "joline", "jolia"]

def validate(mi, df, key):
mask = np.ones(len(df)).astype("bool")
Expand Down Expand Up @@ -51,38 +80,11 @@ def validate(mi, df, key):
else: # multi hit
tm.assert_frame_equal(mi.loc[key[: i + 1]], right)

def loop(mi, df, keys):
for key in keys:
validate(mi, df, key)

n, m = 1000, 50

vals = [
randint(0, 10, n),
choice(list("abcdefghij"), n),
choice(pd.date_range("20141009", periods=10).tolist(), n),
choice(list("ZYXWVUTSRQ"), n),
randn(n),
]
vals = list(map(tuple, zip(*vals)))

# bunch of keys for testing
keys = [
randint(0, 11, m),
choice(list("abcdefghijk"), m),
choice(pd.date_range("20141009", periods=11).tolist(), m),
choice(list("ZYXWVUTSRQP"), m),
]
keys = list(map(tuple, zip(*keys)))
keys += list(map(lambda t: t[:-1], vals[:: n // m]))

# covers both unique index and non-unique index
df = DataFrame(vals, columns=cols)
a, b = pd.concat([df, df]), df.drop_duplicates(subset=cols[:-1])

for frame in a, b:
for i in range(5): # lexsort depth
df = frame.copy() if i == 0 else frame.sort_values(by=cols[:i])
mi = df.set_index(cols[:-1])
assert not mi.index.lexsort_depth < i
loop(mi, df, keys)
if lexsort_depth == 0:
df = frame.copy()
else:
df = frame.sort_values(by=cols[:lexsort_depth])

mi = df.set_index(cols[:-1])
assert not mi.index.lexsort_depth < lexsort_depth
validate(mi, df, key)