Skip to content

TST: parameterize and split #45183

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 4, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
181 changes: 125 additions & 56 deletions pandas/tests/frame/methods/test_reindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
timedelta,
)
import inspect
from itertools import permutations

import numpy as np
import pytest
Expand Down Expand Up @@ -302,23 +301,24 @@ def test_reindex_limit(self):
expected = DataFrame(exp_data)
tm.assert_frame_equal(result, expected)

def test_reindex_level(self):
icol = ["jim", "joe", "jolie"]

def verify_first_level(df, level, idx, check_index_type=True):
def f(val):
return np.nonzero((df[level] == val).to_numpy())[0]

i = np.concatenate(list(map(f, idx)))
left = df.set_index(icol).reindex(idx, level=level)
right = df.iloc[i].set_index(icol)
tm.assert_frame_equal(left, right, check_index_type=check_index_type)

def verify(df, level, idx, indexer, check_index_type=True):
left = df.set_index(icol).reindex(idx, level=level)
right = df.iloc[indexer].set_index(icol)
tm.assert_frame_equal(left, right, check_index_type=check_index_type)

@pytest.mark.parametrize(
"idx, check_index_type",
[
[["C", "B", "A"], True],
[["F", "C", "A", "D"], True],
[["A"], True],
[["A", "B", "C"], True],
[["C", "A", "B"], True],
[["C", "B"], True],
[["C", "A"], True],
[["A", "B"], True],
[["B", "A", "C"], True],
# reindex by these causes different MultiIndex levels
[["D", "F"], False],
[["A", "C", "B"], False],
],
)
def test_reindex_level_verify_first_level(self, idx, check_index_type):
df = DataFrame(
{
"jim": list("B" * 4 + "A" * 2 + "C" * 3),
Expand All @@ -327,35 +327,40 @@ def verify(df, level, idx, indexer, check_index_type=True):
"joline": np.random.randint(0, 1000, 9),
}
)
icol = ["jim", "joe", "jolie"]

target = [
["C", "B", "A"],
["F", "C", "A", "D"],
["A"],
["A", "B", "C"],
["C", "A", "B"],
["C", "B"],
["C", "A"],
["A", "B"],
["B", "A", "C"],
]

for idx in target:
verify_first_level(df, "jim", idx)

# reindex by these causes different MultiIndex levels
for idx in [["D", "F"], ["A", "C", "B"]]:
verify_first_level(df, "jim", idx, check_index_type=False)
def f(val):
return np.nonzero((df["jim"] == val).to_numpy())[0]

verify(df, "joe", list("abcde"), [3, 2, 1, 0, 5, 4, 8, 7, 6])
verify(df, "joe", list("abcd"), [3, 2, 1, 0, 5, 8, 7, 6])
verify(df, "joe", list("abc"), [3, 2, 1, 8, 7, 6])
verify(df, "joe", list("eca"), [1, 3, 4, 6, 8])
verify(df, "joe", list("edc"), [0, 1, 4, 5, 6])
verify(df, "joe", list("eadbc"), [3, 0, 2, 1, 4, 5, 8, 7, 6])
verify(df, "joe", list("edwq"), [0, 4, 5])
verify(df, "joe", list("wq"), [], check_index_type=False)
i = np.concatenate(list(map(f, idx)))
left = df.set_index(icol).reindex(idx, level="jim")
right = df.iloc[i].set_index(icol)
tm.assert_frame_equal(left, right, check_index_type=check_index_type)

@pytest.mark.parametrize(
"idx",
[
("mid",),
("mid", "btm"),
("mid", "btm", "top"),
("mid",),
("mid", "top"),
("mid", "top", "btm"),
("btm",),
("btm", "mid"),
("btm", "mid", "top"),
("btm",),
("btm", "top"),
("btm", "top", "mid"),
("top",),
("top", "mid"),
("top", "mid", "btm"),
("top",),
("top", "btm"),
("top", "btm", "mid"),
],
)
def test_reindex_level_verify_first_level_repeats(self, idx):
df = DataFrame(
{
"jim": ["mid"] * 5 + ["btm"] * 8 + ["top"] * 7,
Expand All @@ -379,22 +384,86 @@ def verify(df, level, idx, indexer, check_index_type=True):
"joline": np.random.randn(20).round(3) * 10,
}
)
icol = ["jim", "joe", "jolie"]

for idx in permutations(df["jim"].unique()):
for i in range(3):
verify_first_level(df, "jim", idx[: i + 1])

i = [2, 3, 4, 0, 1, 8, 9, 5, 6, 7, 10, 11, 12, 13, 14, 18, 19, 15, 16, 17]
verify(df, "joe", ["1st", "2nd", "3rd"], i)
def f(val):
return np.nonzero((df["jim"] == val).to_numpy())[0]

i = [0, 1, 2, 3, 4, 10, 11, 12, 5, 6, 7, 8, 9, 15, 16, 17, 18, 19, 13, 14]
verify(df, "joe", ["3rd", "2nd", "1st"], i)
i = np.concatenate(list(map(f, idx)))
left = df.set_index(icol).reindex(idx, level="jim")
right = df.iloc[i].set_index(icol)
tm.assert_frame_equal(left, right)

i = [0, 1, 5, 6, 7, 10, 11, 12, 18, 19, 15, 16, 17]
verify(df, "joe", ["2nd", "3rd"], i)
@pytest.mark.parametrize(
"idx, indexer",
[
[
["1st", "2nd", "3rd"],
[2, 3, 4, 0, 1, 8, 9, 5, 6, 7, 10, 11, 12, 13, 14, 18, 19, 15, 16, 17],
],
[
["3rd", "2nd", "1st"],
[0, 1, 2, 3, 4, 10, 11, 12, 5, 6, 7, 8, 9, 15, 16, 17, 18, 19, 13, 14],
],
[["2nd", "3rd"], [0, 1, 5, 6, 7, 10, 11, 12, 18, 19, 15, 16, 17]],
[["3rd", "1st"], [0, 1, 2, 3, 4, 10, 11, 12, 8, 9, 15, 16, 17, 13, 14]],
],
)
def test_reindex_level_verify_repeats(self, idx, indexer):
df = DataFrame(
{
"jim": ["mid"] * 5 + ["btm"] * 8 + ["top"] * 7,
"joe": ["3rd"] * 2
+ ["1st"] * 3
+ ["2nd"] * 3
+ ["1st"] * 2
+ ["3rd"] * 3
+ ["1st"] * 2
+ ["3rd"] * 3
+ ["2nd"] * 2,
# this needs to be jointly unique with jim and joe or
# reindexing will fail ~1.5% of the time, this works
# out to needing unique groups of same size as joe
"jolie": np.concatenate(
[
np.random.choice(1000, x, replace=False)
for x in [2, 3, 3, 2, 3, 2, 3, 2]
]
),
"joline": np.random.randn(20).round(3) * 10,
}
)
icol = ["jim", "joe", "jolie"]
left = df.set_index(icol).reindex(idx, level="joe")
right = df.iloc[indexer].set_index(icol)
tm.assert_frame_equal(left, right)

i = [0, 1, 2, 3, 4, 10, 11, 12, 8, 9, 15, 16, 17, 13, 14]
verify(df, "joe", ["3rd", "1st"], i)
@pytest.mark.parametrize(
"idx, indexer, check_index_type",
[
[list("abcde"), [3, 2, 1, 0, 5, 4, 8, 7, 6], True],
[list("abcd"), [3, 2, 1, 0, 5, 8, 7, 6], True],
[list("abc"), [3, 2, 1, 8, 7, 6], True],
[list("eca"), [1, 3, 4, 6, 8], True],
[list("edc"), [0, 1, 4, 5, 6], True],
[list("eadbc"), [3, 0, 2, 1, 4, 5, 8, 7, 6], True],
[list("edwq"), [0, 4, 5], True],
[list("wq"), [], False],
],
)
def test_reindex_level_verify(self, idx, indexer, check_index_type):
df = DataFrame(
{
"jim": list("B" * 4 + "A" * 2 + "C" * 3),
"joe": list("abcdeabcd")[::-1],
"jolie": [10, 20, 30] * 3,
"joline": np.random.randint(0, 1000, 9),
}
)
icol = ["jim", "joe", "jolie"]
left = df.set_index(icol).reindex(idx, level="joe")
right = df.iloc[indexer].set_index(icol)
tm.assert_frame_equal(left, right, check_index_type=check_index_type)

def test_non_monotonic_reindex_methods(self):
dr = date_range("2013-08-01", periods=6, freq="B")
Expand Down
54 changes: 31 additions & 23 deletions pandas/tests/frame/test_stack_unstack.py
Original file line number Diff line number Diff line change
Expand Up @@ -751,21 +751,14 @@ def test_unstack_multi_level_rows_and_cols(self):
expected = df.unstack(["i3"]).unstack(["i2"])
tm.assert_frame_equal(result, expected)

def test_unstack_nan_index1(self):
@pytest.mark.parametrize("idx", [("jim", "joe"), ("joe", "jim")])
@pytest.mark.parametrize("lev", list(range(2)))
def test_unstack_nan_index1(self, idx, lev):
# GH7466
def cast(val):
val_str = "" if val != val else val
return f"{val_str:1}"

def verify(df):
mk_list = lambda a: list(a) if isinstance(a, tuple) else [a]
rows, cols = df.notna().values.nonzero()
for i, j in zip(rows, cols):
left = sorted(df.iloc[i, j].split("."))
right = mk_list(df.index[i]) + mk_list(df.columns[j])
right = sorted(map(cast, right))
assert left == right

df = DataFrame(
{
"jim": ["a", "b", np.nan, "d"],
Expand All @@ -778,12 +771,24 @@ def verify(df):
right = df.set_index(["joe", "jim"]).unstack()["jolie"].T
tm.assert_frame_equal(left, right)

for idx in itertools.permutations(df.columns[:2]):
mi = df.set_index(list(idx))
for lev in range(2):
udf = mi.unstack(level=lev)
assert udf.notna().values.sum() == len(df)
verify(udf["jolie"])
mi = df.set_index(list(idx))
udf = mi.unstack(level=lev)
assert udf.notna().values.sum() == len(df)
mk_list = lambda a: list(a) if isinstance(a, tuple) else [a]
rows, cols = udf["jolie"].notna().values.nonzero()
for i, j in zip(rows, cols):
left = sorted(udf["jolie"].iloc[i, j].split("."))
right = mk_list(udf["jolie"].index[i]) + mk_list(udf["jolie"].columns[j])
right = sorted(map(cast, right))
assert left == right

@pytest.mark.parametrize("idx", itertools.permutations(["1st", "2nd", "3rd"]))
@pytest.mark.parametrize("lev", list(range(3)))
@pytest.mark.parametrize("col", ["4th", "5th"])
def test_unstack_nan_index_repeats(self, idx, lev, col):
def cast(val):
val_str = "" if val != val else val
return f"{val_str:1}"

df = DataFrame(
{
Expand Down Expand Up @@ -830,13 +835,16 @@ def verify(df):
df.apply(lambda r: ".".join(map(cast, r.iloc[::-1])), axis=1),
)

for idx in itertools.permutations(["1st", "2nd", "3rd"]):
mi = df.set_index(list(idx))
for lev in range(3):
udf = mi.unstack(level=lev)
assert udf.notna().values.sum() == 2 * len(df)
for col in ["4th", "5th"]:
verify(udf[col])
mi = df.set_index(list(idx))
udf = mi.unstack(level=lev)
assert udf.notna().values.sum() == 2 * len(df)
mk_list = lambda a: list(a) if isinstance(a, tuple) else [a]
rows, cols = udf[col].notna().values.nonzero()
for i, j in zip(rows, cols):
left = sorted(udf[col].iloc[i, j].split("."))
right = mk_list(udf[col].index[i]) + mk_list(udf[col].columns[j])
right = sorted(map(cast, right))
assert left == right

def test_unstack_nan_index2(self):
# GH7403
Expand Down