Skip to content

Commit 90ff22d

Browse files
committed
split up some value_counts groupby tests a bit
1 parent 18ea902 commit 90ff22d

File tree

3 files changed

+60
-54
lines changed

3 files changed

+60
-54
lines changed

pandas/core/groupby.py

-1
Original file line numberDiff line numberDiff line change
@@ -2426,7 +2426,6 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True,
24262426
a BaseGrouper.
24272427
24282428
"""
2429-
24302429
group_axis = obj._get_axis(axis)
24312430

24322431
# validate that the passed level is compatible with the passed

pandas/tests/groupby/test_groupby.py

-53
Original file line numberDiff line numberDiff line change
@@ -1510,59 +1510,6 @@ def check_nunique(df, keys, as_index=True):
15101510
check_nunique(frame, ['jim'], as_index=False)
15111511
check_nunique(frame, ['jim', 'joe'], as_index=False)
15121512

1513-
def test_series_groupby_value_counts(self):
1514-
from itertools import product
1515-
np.random.seed(1234)
1516-
1517-
def rebuild_index(df):
1518-
arr = list(map(df.index.get_level_values, range(df.index.nlevels)))
1519-
df.index = MultiIndex.from_arrays(arr, names=df.index.names)
1520-
return df
1521-
1522-
def check_value_counts(df, keys, bins):
1523-
for isort, normalize, sort, ascending, dropna \
1524-
in product((False, True), repeat=5):
1525-
1526-
kwargs = dict(normalize=normalize, sort=sort,
1527-
ascending=ascending, dropna=dropna, bins=bins)
1528-
1529-
gr = df.groupby(keys, sort=isort)
1530-
left = gr['3rd'].value_counts(**kwargs)
1531-
1532-
gr = df.groupby(keys, sort=isort)
1533-
right = gr['3rd'].apply(Series.value_counts, **kwargs)
1534-
right.index.names = right.index.names[:-1] + ['3rd']
1535-
1536-
# have to sort on index because of unstable sort on values
1537-
left, right = map(rebuild_index, (left, right)) # xref GH9212
1538-
assert_series_equal(left.sort_index(), right.sort_index())
1539-
1540-
def loop(df):
1541-
bins = None, np.arange(0, max(5, df['3rd'].max()) + 1, 2)
1542-
keys = '1st', '2nd', ('1st', '2nd')
1543-
for k, b in product(keys, bins):
1544-
check_value_counts(df, k, b)
1545-
1546-
days = date_range('2015-08-24', periods=10)
1547-
1548-
for n, m in product((100, 1000), (5, 20)):
1549-
frame = DataFrame({
1550-
'1st': np.random.choice(
1551-
list('abcd'), n),
1552-
'2nd': np.random.choice(days, n),
1553-
'3rd': np.random.randint(1, m + 1, n)
1554-
})
1555-
1556-
loop(frame)
1557-
1558-
frame.loc[1::11, '1st'] = nan
1559-
frame.loc[3::17, '2nd'] = nan
1560-
frame.loc[7::19, '3rd'] = nan
1561-
frame.loc[8::19, '3rd'] = nan
1562-
frame.loc[9::19, '3rd'] = nan
1563-
1564-
loop(frame)
1565-
15661513
def test_multiindex_passthru(self):
15671514

15681515
# GH 7997
+60
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
import pytest
2+
3+
from itertools import product
4+
import numpy as np
5+
6+
from pandas.util import testing as tm
7+
from pandas import MultiIndex, DataFrame, Series, date_range
8+
9+
10+
@pytest.mark.parametrize("n,m", product((100, 1000), (5, 20)))
11+
def test_series_groupby_value_counts(n, m):
12+
np.random.seed(1234)
13+
14+
def rebuild_index(df):
15+
arr = list(map(df.index.get_level_values, range(df.index.nlevels)))
16+
df.index = MultiIndex.from_arrays(arr, names=df.index.names)
17+
return df
18+
19+
def check_value_counts(df, keys, bins):
20+
for isort, normalize, sort, ascending, dropna \
21+
in product((False, True), repeat=5):
22+
23+
kwargs = dict(normalize=normalize, sort=sort,
24+
ascending=ascending, dropna=dropna, bins=bins)
25+
26+
gr = df.groupby(keys, sort=isort)
27+
left = gr['3rd'].value_counts(**kwargs)
28+
29+
gr = df.groupby(keys, sort=isort)
30+
right = gr['3rd'].apply(Series.value_counts, **kwargs)
31+
right.index.names = right.index.names[:-1] + ['3rd']
32+
33+
# have to sort on index because of unstable sort on values
34+
left, right = map(rebuild_index, (left, right)) # xref GH9212
35+
tm.assert_series_equal(left.sort_index(), right.sort_index())
36+
37+
def loop(df):
38+
bins = None, np.arange(0, max(5, df['3rd'].max()) + 1, 2)
39+
keys = '1st', '2nd', ('1st', '2nd')
40+
for k, b in product(keys, bins):
41+
check_value_counts(df, k, b)
42+
43+
days = date_range('2015-08-24', periods=10)
44+
45+
frame = DataFrame({
46+
'1st': np.random.choice(
47+
list('abcd'), n),
48+
'2nd': np.random.choice(days, n),
49+
'3rd': np.random.randint(1, m + 1, n)
50+
})
51+
52+
loop(frame)
53+
54+
frame.loc[1::11, '1st'] = np.nan
55+
frame.loc[3::17, '2nd'] = np.nan
56+
frame.loc[7::19, '3rd'] = np.nan
57+
frame.loc[8::19, '3rd'] = np.nan
58+
frame.loc[9::19, '3rd'] = np.nan
59+
60+
loop(frame)

0 commit comments

Comments
 (0)