Skip to content

Commit a6c1f6c

Browse files
authored
TST: parameterize and split (#45183)
1 parent aa88988 commit a6c1f6c

File tree

2 files changed

+156
-79
lines changed

2 files changed

+156
-79
lines changed

pandas/tests/frame/methods/test_reindex.py

+125-56
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
timedelta,
44
)
55
import inspect
6-
from itertools import permutations
76

87
import numpy as np
98
import pytest
@@ -339,23 +338,24 @@ def test_reindex_limit(self):
339338
expected = DataFrame(exp_data)
340339
tm.assert_frame_equal(result, expected)
341340

342-
def test_reindex_level(self):
343-
icol = ["jim", "joe", "jolie"]
344-
345-
def verify_first_level(df, level, idx, check_index_type=True):
346-
def f(val):
347-
return np.nonzero((df[level] == val).to_numpy())[0]
348-
349-
i = np.concatenate(list(map(f, idx)))
350-
left = df.set_index(icol).reindex(idx, level=level)
351-
right = df.iloc[i].set_index(icol)
352-
tm.assert_frame_equal(left, right, check_index_type=check_index_type)
353-
354-
def verify(df, level, idx, indexer, check_index_type=True):
355-
left = df.set_index(icol).reindex(idx, level=level)
356-
right = df.iloc[indexer].set_index(icol)
357-
tm.assert_frame_equal(left, right, check_index_type=check_index_type)
358-
341+
@pytest.mark.parametrize(
342+
"idx, check_index_type",
343+
[
344+
[["C", "B", "A"], True],
345+
[["F", "C", "A", "D"], True],
346+
[["A"], True],
347+
[["A", "B", "C"], True],
348+
[["C", "A", "B"], True],
349+
[["C", "B"], True],
350+
[["C", "A"], True],
351+
[["A", "B"], True],
352+
[["B", "A", "C"], True],
353+
# reindex by these causes different MultiIndex levels
354+
[["D", "F"], False],
355+
[["A", "C", "B"], False],
356+
],
357+
)
358+
def test_reindex_level_verify_first_level(self, idx, check_index_type):
359359
df = DataFrame(
360360
{
361361
"jim": list("B" * 4 + "A" * 2 + "C" * 3),
@@ -364,35 +364,40 @@ def verify(df, level, idx, indexer, check_index_type=True):
364364
"joline": np.random.randint(0, 1000, 9),
365365
}
366366
)
367+
icol = ["jim", "joe", "jolie"]
367368

368-
target = [
369-
["C", "B", "A"],
370-
["F", "C", "A", "D"],
371-
["A"],
372-
["A", "B", "C"],
373-
["C", "A", "B"],
374-
["C", "B"],
375-
["C", "A"],
376-
["A", "B"],
377-
["B", "A", "C"],
378-
]
379-
380-
for idx in target:
381-
verify_first_level(df, "jim", idx)
382-
383-
# reindex by these causes different MultiIndex levels
384-
for idx in [["D", "F"], ["A", "C", "B"]]:
385-
verify_first_level(df, "jim", idx, check_index_type=False)
369+
def f(val):
370+
return np.nonzero((df["jim"] == val).to_numpy())[0]
386371

387-
verify(df, "joe", list("abcde"), [3, 2, 1, 0, 5, 4, 8, 7, 6])
388-
verify(df, "joe", list("abcd"), [3, 2, 1, 0, 5, 8, 7, 6])
389-
verify(df, "joe", list("abc"), [3, 2, 1, 8, 7, 6])
390-
verify(df, "joe", list("eca"), [1, 3, 4, 6, 8])
391-
verify(df, "joe", list("edc"), [0, 1, 4, 5, 6])
392-
verify(df, "joe", list("eadbc"), [3, 0, 2, 1, 4, 5, 8, 7, 6])
393-
verify(df, "joe", list("edwq"), [0, 4, 5])
394-
verify(df, "joe", list("wq"), [], check_index_type=False)
372+
i = np.concatenate(list(map(f, idx)))
373+
left = df.set_index(icol).reindex(idx, level="jim")
374+
right = df.iloc[i].set_index(icol)
375+
tm.assert_frame_equal(left, right, check_index_type=check_index_type)
395376

377+
@pytest.mark.parametrize(
378+
"idx",
379+
[
380+
("mid",),
381+
("mid", "btm"),
382+
("mid", "btm", "top"),
383+
("mid",),
384+
("mid", "top"),
385+
("mid", "top", "btm"),
386+
("btm",),
387+
("btm", "mid"),
388+
("btm", "mid", "top"),
389+
("btm",),
390+
("btm", "top"),
391+
("btm", "top", "mid"),
392+
("top",),
393+
("top", "mid"),
394+
("top", "mid", "btm"),
395+
("top",),
396+
("top", "btm"),
397+
("top", "btm", "mid"),
398+
],
399+
)
400+
def test_reindex_level_verify_first_level_repeats(self, idx):
396401
df = DataFrame(
397402
{
398403
"jim": ["mid"] * 5 + ["btm"] * 8 + ["top"] * 7,
@@ -416,22 +421,86 @@ def verify(df, level, idx, indexer, check_index_type=True):
416421
"joline": np.random.randn(20).round(3) * 10,
417422
}
418423
)
424+
icol = ["jim", "joe", "jolie"]
419425

420-
for idx in permutations(df["jim"].unique()):
421-
for i in range(3):
422-
verify_first_level(df, "jim", idx[: i + 1])
423-
424-
i = [2, 3, 4, 0, 1, 8, 9, 5, 6, 7, 10, 11, 12, 13, 14, 18, 19, 15, 16, 17]
425-
verify(df, "joe", ["1st", "2nd", "3rd"], i)
426+
def f(val):
427+
return np.nonzero((df["jim"] == val).to_numpy())[0]
426428

427-
i = [0, 1, 2, 3, 4, 10, 11, 12, 5, 6, 7, 8, 9, 15, 16, 17, 18, 19, 13, 14]
428-
verify(df, "joe", ["3rd", "2nd", "1st"], i)
429+
i = np.concatenate(list(map(f, idx)))
430+
left = df.set_index(icol).reindex(idx, level="jim")
431+
right = df.iloc[i].set_index(icol)
432+
tm.assert_frame_equal(left, right)
429433

430-
i = [0, 1, 5, 6, 7, 10, 11, 12, 18, 19, 15, 16, 17]
431-
verify(df, "joe", ["2nd", "3rd"], i)
434+
@pytest.mark.parametrize(
435+
"idx, indexer",
436+
[
437+
[
438+
["1st", "2nd", "3rd"],
439+
[2, 3, 4, 0, 1, 8, 9, 5, 6, 7, 10, 11, 12, 13, 14, 18, 19, 15, 16, 17],
440+
],
441+
[
442+
["3rd", "2nd", "1st"],
443+
[0, 1, 2, 3, 4, 10, 11, 12, 5, 6, 7, 8, 9, 15, 16, 17, 18, 19, 13, 14],
444+
],
445+
[["2nd", "3rd"], [0, 1, 5, 6, 7, 10, 11, 12, 18, 19, 15, 16, 17]],
446+
[["3rd", "1st"], [0, 1, 2, 3, 4, 10, 11, 12, 8, 9, 15, 16, 17, 13, 14]],
447+
],
448+
)
449+
def test_reindex_level_verify_repeats(self, idx, indexer):
450+
df = DataFrame(
451+
{
452+
"jim": ["mid"] * 5 + ["btm"] * 8 + ["top"] * 7,
453+
"joe": ["3rd"] * 2
454+
+ ["1st"] * 3
455+
+ ["2nd"] * 3
456+
+ ["1st"] * 2
457+
+ ["3rd"] * 3
458+
+ ["1st"] * 2
459+
+ ["3rd"] * 3
460+
+ ["2nd"] * 2,
461+
# this needs to be jointly unique with jim and joe or
462+
# reindexing will fail ~1.5% of the time, this works
463+
# out to needing unique groups of same size as joe
464+
"jolie": np.concatenate(
465+
[
466+
np.random.choice(1000, x, replace=False)
467+
for x in [2, 3, 3, 2, 3, 2, 3, 2]
468+
]
469+
),
470+
"joline": np.random.randn(20).round(3) * 10,
471+
}
472+
)
473+
icol = ["jim", "joe", "jolie"]
474+
left = df.set_index(icol).reindex(idx, level="joe")
475+
right = df.iloc[indexer].set_index(icol)
476+
tm.assert_frame_equal(left, right)
432477

433-
i = [0, 1, 2, 3, 4, 10, 11, 12, 8, 9, 15, 16, 17, 13, 14]
434-
verify(df, "joe", ["3rd", "1st"], i)
478+
@pytest.mark.parametrize(
479+
"idx, indexer, check_index_type",
480+
[
481+
[list("abcde"), [3, 2, 1, 0, 5, 4, 8, 7, 6], True],
482+
[list("abcd"), [3, 2, 1, 0, 5, 8, 7, 6], True],
483+
[list("abc"), [3, 2, 1, 8, 7, 6], True],
484+
[list("eca"), [1, 3, 4, 6, 8], True],
485+
[list("edc"), [0, 1, 4, 5, 6], True],
486+
[list("eadbc"), [3, 0, 2, 1, 4, 5, 8, 7, 6], True],
487+
[list("edwq"), [0, 4, 5], True],
488+
[list("wq"), [], False],
489+
],
490+
)
491+
def test_reindex_level_verify(self, idx, indexer, check_index_type):
492+
df = DataFrame(
493+
{
494+
"jim": list("B" * 4 + "A" * 2 + "C" * 3),
495+
"joe": list("abcdeabcd")[::-1],
496+
"jolie": [10, 20, 30] * 3,
497+
"joline": np.random.randint(0, 1000, 9),
498+
}
499+
)
500+
icol = ["jim", "joe", "jolie"]
501+
left = df.set_index(icol).reindex(idx, level="joe")
502+
right = df.iloc[indexer].set_index(icol)
503+
tm.assert_frame_equal(left, right, check_index_type=check_index_type)
435504

436505
def test_non_monotonic_reindex_methods(self):
437506
dr = date_range("2013-08-01", periods=6, freq="B")

pandas/tests/frame/test_stack_unstack.py

+31-23
Original file line numberDiff line numberDiff line change
@@ -751,21 +751,14 @@ def test_unstack_multi_level_rows_and_cols(self):
751751
expected = df.unstack(["i3"]).unstack(["i2"])
752752
tm.assert_frame_equal(result, expected)
753753

754-
def test_unstack_nan_index1(self):
754+
@pytest.mark.parametrize("idx", [("jim", "joe"), ("joe", "jim")])
755+
@pytest.mark.parametrize("lev", list(range(2)))
756+
def test_unstack_nan_index1(self, idx, lev):
755757
# GH7466
756758
def cast(val):
757759
val_str = "" if val != val else val
758760
return f"{val_str:1}"
759761

760-
def verify(df):
761-
mk_list = lambda a: list(a) if isinstance(a, tuple) else [a]
762-
rows, cols = df.notna().values.nonzero()
763-
for i, j in zip(rows, cols):
764-
left = sorted(df.iloc[i, j].split("."))
765-
right = mk_list(df.index[i]) + mk_list(df.columns[j])
766-
right = sorted(map(cast, right))
767-
assert left == right
768-
769762
df = DataFrame(
770763
{
771764
"jim": ["a", "b", np.nan, "d"],
@@ -778,12 +771,24 @@ def verify(df):
778771
right = df.set_index(["joe", "jim"]).unstack()["jolie"].T
779772
tm.assert_frame_equal(left, right)
780773

781-
for idx in itertools.permutations(df.columns[:2]):
782-
mi = df.set_index(list(idx))
783-
for lev in range(2):
784-
udf = mi.unstack(level=lev)
785-
assert udf.notna().values.sum() == len(df)
786-
verify(udf["jolie"])
774+
mi = df.set_index(list(idx))
775+
udf = mi.unstack(level=lev)
776+
assert udf.notna().values.sum() == len(df)
777+
mk_list = lambda a: list(a) if isinstance(a, tuple) else [a]
778+
rows, cols = udf["jolie"].notna().values.nonzero()
779+
for i, j in zip(rows, cols):
780+
left = sorted(udf["jolie"].iloc[i, j].split("."))
781+
right = mk_list(udf["jolie"].index[i]) + mk_list(udf["jolie"].columns[j])
782+
right = sorted(map(cast, right))
783+
assert left == right
784+
785+
@pytest.mark.parametrize("idx", itertools.permutations(["1st", "2nd", "3rd"]))
786+
@pytest.mark.parametrize("lev", list(range(3)))
787+
@pytest.mark.parametrize("col", ["4th", "5th"])
788+
def test_unstack_nan_index_repeats(self, idx, lev, col):
789+
def cast(val):
790+
val_str = "" if val != val else val
791+
return f"{val_str:1}"
787792

788793
df = DataFrame(
789794
{
@@ -830,13 +835,16 @@ def verify(df):
830835
df.apply(lambda r: ".".join(map(cast, r.iloc[::-1])), axis=1),
831836
)
832837

833-
for idx in itertools.permutations(["1st", "2nd", "3rd"]):
834-
mi = df.set_index(list(idx))
835-
for lev in range(3):
836-
udf = mi.unstack(level=lev)
837-
assert udf.notna().values.sum() == 2 * len(df)
838-
for col in ["4th", "5th"]:
839-
verify(udf[col])
838+
mi = df.set_index(list(idx))
839+
udf = mi.unstack(level=lev)
840+
assert udf.notna().values.sum() == 2 * len(df)
841+
mk_list = lambda a: list(a) if isinstance(a, tuple) else [a]
842+
rows, cols = udf[col].notna().values.nonzero()
843+
for i, j in zip(rows, cols):
844+
left = sorted(udf[col].iloc[i, j].split("."))
845+
right = mk_list(udf[col].index[i]) + mk_list(udf[col].columns[j])
846+
right = sorted(map(cast, right))
847+
assert left == right
840848

841849
def test_unstack_nan_index2(self):
842850
# GH7403

0 commit comments

Comments
 (0)