Skip to content

Commit 62b9b28

Browse files
committed
Added tests for multiple groups
1 parent c607655 commit 62b9b28

File tree

1 file changed

+108
-141
lines changed

1 file changed

+108
-141
lines changed

pandas/tests/groupby/test_groupby.py

+108-141
Original file line numberDiff line numberDiff line change
@@ -1895,61 +1895,49 @@ def test_rank_apply(self):
18951895
expected = expected.reindex(result.index)
18961896
assert_series_equal(result, expected)
18971897

1898+
@pytest.mark.parametrize("grps", [
1899+
['qux'], ['qux', 'quux']])
18981900
@pytest.mark.parametrize("vals", [
18991901
[2, 2, 8, 2, 6], ['bar', 'bar', 'foo', 'bar', 'baz'],
19001902
[pd.Timestamp('2018-01-02'), pd.Timestamp('2018-01-02'),
19011903
pd.Timestamp('2018-01-08'), pd.Timestamp('2018-01-02'),
19021904
pd.Timestamp('2018-01-06')]])
19031905
@pytest.mark.parametrize("ties_method,ascending,pct,exp", [
1904-
('average', True, False, DataFrame(
1905-
[2., 2., 5., 2., 4.], columns=['val'])),
1906-
('average', True, True, DataFrame(
1907-
[0.4, 0.4, 1.0, 0.4, 0.8], columns=['val'])),
1908-
('average', False, False, DataFrame(
1909-
[4., 4., 1., 4., 2.], columns=['val'])),
1910-
('average', False, True, DataFrame(
1911-
[.8, .8, .2, .8, .4], columns=['val'])),
1912-
('min', True, False, DataFrame(
1913-
[1., 1., 5., 1., 4.], columns=['val'])),
1914-
('min', True, True, DataFrame(
1915-
[0.2, 0.2, 1.0, 0.2, 0.8], columns=['val'])),
1916-
('min', False, False, DataFrame(
1917-
[3., 3., 1., 3., 2.], columns=['val'])),
1918-
('min', False, True, DataFrame(
1919-
[.6, .6, .2, .6, .4], columns=['val'])),
1920-
('max', True, False, DataFrame(
1921-
[3., 3., 5., 3., 4.], columns=['val'])),
1922-
('max', True, True, DataFrame(
1923-
[0.6, 0.6, 1.0, 0.6, 0.8], columns=['val'])),
1924-
('max', False, False, DataFrame(
1925-
[5., 5., 1., 5., 2.], columns=['val'])),
1926-
('max', False, True, DataFrame(
1927-
[1., 1., .2, 1., .4], columns=['val'])),
1928-
('first', True, False, DataFrame(
1929-
[1., 2., 5., 3., 4.], columns=['val'])),
1930-
('first', True, True, DataFrame(
1931-
[0.2, 0.4, 1.0, 0.6, 0.8], columns=['val'])),
1932-
('first', False, False, DataFrame(
1933-
[3., 4., 1., 5., 2.], columns=['val'])),
1934-
('first', False, True, DataFrame(
1935-
[.6, .8, .2, 1., .4], columns=['val'])),
1936-
('dense', True, False, DataFrame(
1937-
[1., 1., 3., 1., 2.], columns=['val'])),
1938-
('dense', True, True, DataFrame(
1939-
[0.2, 0.2, 0.6, 0.2, 0.4], columns=['val'])),
1940-
('dense', False, False, DataFrame(
1941-
[3., 3., 1., 3., 2.], columns=['val'])),
1942-
('dense', False, True, DataFrame(
1943-
[.6, .6, .2, .6, .4], columns=['val'])),
1906+
('average', True, False, [2., 2., 5., 2., 4.]),
1907+
('average', True, True, [0.4, 0.4, 1.0, 0.4, 0.8]),
1908+
('average', False, False, [4., 4., 1., 4., 2.]),
1909+
('average', False, True, [.8, .8, .2, .8, .4]),
1910+
('min', True, False, [1., 1., 5., 1., 4.]),
1911+
('min', True, True, [0.2, 0.2, 1.0, 0.2, 0.8]),
1912+
('min', False, False, [3., 3., 1., 3., 2.]),
1913+
('min', False, True, [.6, .6, .2, .6, .4]),
1914+
('max', True, False, [3., 3., 5., 3., 4.]),
1915+
('max', True, True, [0.6, 0.6, 1.0, 0.6, 0.8]),
1916+
('max', False, False, [5., 5., 1., 5., 2.]),
1917+
('max', False, True, [1., 1., .2, 1., .4]),
1918+
('first', True, False, [1., 2., 5., 3., 4.]),
1919+
('first', True, True, [0.2, 0.4, 1.0, 0.6, 0.8]),
1920+
('first', False, False, [3., 4., 1., 5., 2.]),
1921+
('first', False, True, [.6, .8, .2, 1., .4]),
1922+
('dense', True, False, [1., 1., 3., 1., 2.]),
1923+
('dense', True, True, [0.2, 0.2, 0.6, 0.2, 0.4]),
1924+
('dense', False, False, [3., 3., 1., 3., 2.]),
1925+
('dense', False, True, [.6, .6, .2, .6, .4]),
19441926
])
1945-
def test_rank_args(self, vals, ties_method, ascending, pct, exp):
1927+
def test_rank_args(self, grps, vals, ties_method, ascending, pct, exp):
19461928
if ties_method == 'first' and vals[0] == 'bar':
19471929
pytest.xfail("See GH 19482")
1948-
df = DataFrame({'key': ['foo']*5, 'val': vals})
1930+
key = np.repeat(grps, len(vals))
1931+
vals = vals * len(grps)
1932+
df = DataFrame({'key': key, 'val': vals})
19491933
result = df.groupby('key').rank(method=ties_method, ascending=ascending,
19501934
pct=pct)
1951-
assert_frame_equal(result, exp)
19521935

1936+
exp_df = DataFrame(exp * len(grps), columns=['val'])
1937+
assert_frame_equal(result, exp_df)
1938+
1939+
@pytest.mark.parametrize("grps", [
1940+
['qux'], ['qux', 'quux']])
19531941
@pytest.mark.parametrize("vals", [
19541942
[2, 2, np.nan, 8, 2, 6, np.nan, np.nan], # floats
19551943
['bar', 'bar', np.nan, 'foo', 'bar', 'baz', np.nan, np.nan], # objects
@@ -1958,110 +1946,89 @@ def test_rank_args(self, vals, ties_method, ascending, pct, exp):
19581946
pd.Timestamp('2018-01-06'), np.nan, np.nan]
19591947
])
19601948
@pytest.mark.parametrize("ties_method,ascending,na_option,pct,exp", [
1961-
('average', True, 'keep', False, DataFrame(
1962-
[2., 2., np.nan, 5., 2., 4., np.nan, np.nan], columns=['val'])),
1963-
('average', True, 'keep', True, DataFrame(
1964-
[0.4, 0.4, np.nan, 1.0, 0.4, 0.8, np.nan, np.nan],
1965-
columns=['val'])),
1966-
('average', False, 'keep', False, DataFrame(
1967-
[4., 4., np.nan, 1., 4., 2., np.nan, np.nan], columns=['val'])),
1968-
('average', False, 'keep', True, DataFrame(
1969-
[.8, 0.8, np.nan, 0.2, 0.8, 0.4, np.nan, np.nan], columns=['val'])),
1970-
('min', True, 'keep', False, DataFrame(
1971-
[1., 1., np.nan, 5., 1., 4., np.nan, np.nan], columns=['val'])),
1972-
('min', True, 'keep', True, DataFrame(
1973-
[0.2, 0.2, np.nan, 1.0, 0.2, 0.8, np.nan, np.nan],
1974-
columns=['val'])),
1975-
('min', False, 'keep', False, DataFrame(
1976-
[3., 3., np.nan, 1., 3., 2., np.nan, np.nan], columns=['val'])),
1977-
('min', False, 'keep', True, DataFrame(
1978-
[.6, 0.6, np.nan, 0.2, 0.6, 0.4, np.nan, np.nan], columns=['val'])),
1979-
('max', True, 'keep', False, DataFrame(
1980-
[3., 3., np.nan, 5., 3., 4., np.nan, np.nan], columns=['val'])),
1981-
('max', True, 'keep', True, DataFrame(
1982-
[0.6, 0.6, np.nan, 1.0, 0.6, 0.8, np.nan, np.nan],
1983-
columns=['val'])),
1984-
('max', False, 'keep', False, DataFrame(
1985-
[5., 5., np.nan, 1., 5., 2., np.nan, np.nan], columns=['val'])),
1986-
('max', False, 'keep', True, DataFrame(
1987-
[1., 1., np.nan, 0.2, 1., 0.4, np.nan, np.nan], columns=['val'])),
1988-
('first', True, 'keep', False, DataFrame(
1989-
[1., 2., np.nan, 5., 3., 4., np.nan, np.nan], columns=['val'])),
1990-
('first', True, 'keep', True, DataFrame(
1991-
[0.2, 0.4, np.nan, 1.0, 0.6, 0.8, np.nan, np.nan],
1992-
columns=['val'])),
1993-
('first', False, 'keep', False, DataFrame(
1994-
[3., 4., np.nan, 1., 5., 2., np.nan, np.nan], columns=['val'])),
1995-
('first', False, 'keep', True, DataFrame(
1996-
[.6, 0.8, np.nan, 0.2, 1., 0.4, np.nan, np.nan], columns=['val'])),
1997-
('dense', True, 'keep', False, DataFrame(
1998-
[1., 1., np.nan, 3., 1., 2., np.nan, np.nan], columns=['val'])),
1999-
('dense', True, 'keep', True, DataFrame(
2000-
[0.2, 0.2, np.nan, 0.6, 0.2, 0.4, np.nan, np.nan],
2001-
columns=['val'])),
2002-
('dense', False, 'keep', False, DataFrame(
2003-
[3., 3., np.nan, 1., 3., 2., np.nan, np.nan], columns=['val'])),
2004-
('dense', False, 'keep', True, DataFrame(
2005-
[.6, 0.6, np.nan, 0.2, 0.6, 0.4, np.nan, np.nan], columns=['val'])),
2006-
('average', True, 'no_na', False, DataFrame(
2007-
[2., 2., 7., 5., 2., 4., 7., 7.], columns=['val'])),
2008-
('average', True, 'no_na', True, DataFrame(
2009-
[0.25, 0.25, 0.875, 0.625, 0.25, 0.5, 0.875, 0.875],
2010-
columns=['val'])),
2011-
('average', False, 'no_na', False, DataFrame(
2012-
[4., 4., 7.0, 1., 4., 2., 7.0, 7.0], columns=['val'])),
2013-
('average', False, 'no_na', True, DataFrame(
2014-
[0.5, 0.5, 0.875, 0.125, 0.5, 0.25, 0.875, 0.875],
2015-
columns=['val'])),
2016-
('min', True, 'no_na', False, DataFrame(
2017-
[1., 1., 6., 5., 1., 4., 6., 6.], columns=['val'])),
2018-
('min', True, 'no_na', True, DataFrame(
2019-
[0.125, 0.125, 0.75, 0.625, 0.125, 0.5, 0.75, 0.75],
2020-
columns=['val'])),
2021-
('min', False, 'no_na', False, DataFrame(
2022-
[3., 3., 6., 1., 3., 2., 6., 6.], columns=['val'])),
2023-
('min', False, 'no_na', True, DataFrame(
2024-
[0.375, 0.375, 0.75, 0.125, 0.375, 0.25, 0.75, 0.75],
2025-
columns=['val'])),
2026-
('max', True, 'no_na', False, DataFrame(
2027-
[3., 3., 8., 5., 3., 4., 8., 8.], columns=['val'])),
2028-
('max', True, 'no_na', True, DataFrame(
2029-
[0.375, 0.375, 1., 0.625, 0.375, 0.5, 1., 1.], columns=['val'])),
2030-
('max', False, 'no_na', False, DataFrame(
2031-
[5., 5., 8., 1., 5., 2., 8., 8.], columns=['val'])),
2032-
('max', False, 'no_na', True, DataFrame(
2033-
[0.625, 0.625, 1., 0.125, 0.625, 0.25, 1., 1.], columns=['val'])),
2034-
('first', True, 'no_na', False, DataFrame(
2035-
[1., 2., 6., 5., 3., 4., 7., 8.], columns=['val'])),
2036-
('first', True, 'no_na', True, DataFrame(
2037-
[0.125, 0.25, 0.75, 0.625, 0.375, 0.5, 0.875, 1.],
2038-
columns=['val'])),
2039-
('first', False, 'no_na', False, DataFrame(
2040-
[3., 4., 6., 1., 5., 2., 7., 8.], columns=['val'])),
2041-
('first', False, 'no_na', True, DataFrame(
2042-
[0.375, 0.5, 0.75, 0.125, 0.625, 0.25, 0.875, 1.],
2043-
columns=['val'])),
2044-
('dense', True, 'no_na', False, DataFrame(
2045-
[1., 1., 4., 3., 1., 2., 4., 4.], columns=['val'])),
2046-
('dense', True, 'no_na', True, DataFrame(
2047-
[0.125, 0.125, 0.5, 0.375, 0.125, 0.25, 0.5, 0.5],
2048-
columns=['val'])),
2049-
('dense', False, 'no_na', False, DataFrame(
2050-
[3., 3., 4., 1., 3., 2., 4., 4.], columns=['val'])),
2051-
('dense', False, 'no_na', True, DataFrame(
2052-
[0.375, 0.375, 0.5, 0.125, 0.375, 0.25, 0.5, 0.5],
2053-
columns=['val'])),
1949+
('average', True, 'keep', False,
1950+
[2., 2., np.nan, 5., 2., 4., np.nan, np.nan]),
1951+
('average', True, 'keep', True,
1952+
[0.4, 0.4, np.nan, 1.0, 0.4, 0.8, np.nan, np.nan]),
1953+
('average', False, 'keep', False,
1954+
[4., 4., np.nan, 1., 4., 2., np.nan, np.nan]),
1955+
('average', False, 'keep', True,
1956+
[.8, 0.8, np.nan, 0.2, 0.8, 0.4, np.nan, np.nan]),
1957+
('min', True, 'keep', False,
1958+
[1., 1., np.nan, 5., 1., 4., np.nan, np.nan]),
1959+
('min', True, 'keep', True,
1960+
[0.2, 0.2, np.nan, 1.0, 0.2, 0.8, np.nan, np.nan]),
1961+
('min', False, 'keep', False,
1962+
[3., 3., np.nan, 1., 3., 2., np.nan, np.nan]),
1963+
('min', False, 'keep', True,
1964+
[.6, 0.6, np.nan, 0.2, 0.6, 0.4, np.nan, np.nan]),
1965+
('max', True, 'keep', False,
1966+
[3., 3., np.nan, 5., 3., 4., np.nan, np.nan]),
1967+
('max', True, 'keep', True,
1968+
[0.6, 0.6, np.nan, 1.0, 0.6, 0.8, np.nan, np.nan]),
1969+
('max', False, 'keep', False,
1970+
[5., 5., np.nan, 1., 5., 2., np.nan, np.nan]),
1971+
('max', False, 'keep', True,
1972+
[1., 1., np.nan, 0.2, 1., 0.4, np.nan, np.nan]),
1973+
('first', True, 'keep', False,
1974+
[1., 2., np.nan, 5., 3., 4., np.nan, np.nan]),
1975+
('first', True, 'keep', True,
1976+
[0.2, 0.4, np.nan, 1.0, 0.6, 0.8, np.nan, np.nan]),
1977+
('first', False, 'keep', False,
1978+
[3., 4., np.nan, 1., 5., 2., np.nan, np.nan]),
1979+
('first', False, 'keep', True,
1980+
[.6, 0.8, np.nan, 0.2, 1., 0.4, np.nan, np.nan]),
1981+
('dense', True, 'keep', False,
1982+
[1., 1., np.nan, 3., 1., 2., np.nan, np.nan]),
1983+
('dense', True, 'keep', True,
1984+
[0.2, 0.2, np.nan, 0.6, 0.2, 0.4, np.nan, np.nan]),
1985+
('dense', False, 'keep', False,
1986+
[3., 3., np.nan, 1., 3., 2., np.nan, np.nan]),
1987+
('dense', False, 'keep', True,
1988+
[.6, 0.6, np.nan, 0.2, 0.6, 0.4, np.nan, np.nan]),
1989+
('average', True, 'no_na', False, [2., 2., 7., 5., 2., 4., 7., 7.]),
1990+
('average', True, 'no_na', True,
1991+
[0.25, 0.25, 0.875, 0.625, 0.25, 0.5, 0.875, 0.875]),
1992+
('average', False, 'no_na', False, [4., 4., 7., 1., 4., 2., 7., 7.]),
1993+
('average', False, 'no_na', True,
1994+
[0.5, 0.5, 0.875, 0.125, 0.5, 0.25, 0.875, 0.875]),
1995+
('min', True, 'no_na', False, [1., 1., 6., 5., 1., 4., 6., 6.]),
1996+
('min', True, 'no_na', True,
1997+
[0.125, 0.125, 0.75, 0.625, 0.125, 0.5, 0.75, 0.75]),
1998+
('min', False, 'no_na', False, [3., 3., 6., 1., 3., 2., 6., 6.]),
1999+
('min', False, 'no_na', True,
2000+
[0.375, 0.375, 0.75, 0.125, 0.375, 0.25, 0.75, 0.75]),
2001+
('max', True, 'no_na', False, [3., 3., 8., 5., 3., 4., 8., 8.]),
2002+
('max', True, 'no_na', True,
2003+
[0.375, 0.375, 1., 0.625, 0.375, 0.5, 1., 1.]),
2004+
('max', False, 'no_na', False, [5., 5., 8., 1., 5., 2., 8., 8.]),
2005+
('max', False, 'no_na', True,
2006+
[0.625, 0.625, 1., 0.125, 0.625, 0.25, 1., 1.]),
2007+
('first', True, 'no_na', False, [1., 2., 6., 5., 3., 4., 7., 8.]),
2008+
('first', True, 'no_na', True,
2009+
[0.125, 0.25, 0.75, 0.625, 0.375, 0.5, 0.875, 1.]),
2010+
('first', False, 'no_na', False, [3., 4., 6., 1., 5., 2., 7., 8.]),
2011+
('first', False, 'no_na', True,
2012+
[0.375, 0.5, 0.75, 0.125, 0.625, 0.25, 0.875, 1.]),
2013+
('dense', True, 'no_na', False, [1., 1., 4., 3., 1., 2., 4., 4.]),
2014+
('dense', True, 'no_na', True,
2015+
[0.125, 0.125, 0.5, 0.375, 0.125, 0.25, 0.5, 0.5]),
2016+
('dense', False, 'no_na', False, [3., 3., 4., 1., 3., 2., 4., 4.]),
2017+
('dense', False, 'no_na', True,
2018+
[0.375, 0.375, 0.5, 0.125, 0.375, 0.25, 0.5, 0.5])
20542019
])
2055-
def test_rank_args_missing(self, vals, ties_method, ascending, na_option,
2056-
pct, exp):
2020+
def test_rank_args_missing(self, grps, vals, ties_method, ascending,
2021+
na_option, pct, exp):
20572022
if ties_method == 'first' and vals[0] == 'bar':
20582023
pytest.xfail("See GH 19482")
2059-
2060-
df = DataFrame({'key': ['foo']*8, 'val': vals})
2024+
key = np.repeat(grps, len(vals))
2025+
vals = vals * len(grps)
2026+
df = DataFrame({'key': key, 'val': vals})
20612027
result = df.groupby('key').rank(method=ties_method, ascending=ascending,
20622028
na_option=na_option, pct=pct)
20632029

2064-
assert_frame_equal(result, exp)
2030+
exp_df = DataFrame(exp * len(grps), columns=['val'])
2031+
assert_frame_equal(result, exp_df)
20652032

20662033
def test_dont_clobber_name_column(self):
20672034
df = DataFrame({'key': ['a', 'a', 'a', 'b', 'b', 'b'],

0 commit comments

Comments
 (0)