Skip to content

Commit 026223a

Browse files
committed
PEP8 & comment
1 parent 8a2ebb7 commit 026223a

File tree

2 files changed

+53
-7
lines changed

2 files changed

+53
-7
lines changed

pandas/core/groupby.py

+44-1
Original file line numberDiff line numberDiff line change
@@ -1727,6 +1727,18 @@ class BaseGrouper(object):
17271727
"""
17281728
This is an internal Grouper class, which actually holds
17291729
the generated groups
1730+
1731+
Parameters
1732+
----------
1733+
axis : the axis to group
1734+
groupings : all the grouping instances to handle in this grouper
1735+
for example for grouper list to groupby, need to pass the list
1736+
sort : True/False
1737+
whether this grouper will give sorted result or not
1738+
indexer: the indexer created by Grouper
1739+
some grouper (TimeGrouper eg) will sort its axis and its
1740+
group_info is also sorted, so need the indexer to reorder
1741+
17301742
"""
17311743

17321744
def __init__(self, axis, groupings, sort=True, group_keys=True,
@@ -2283,6 +2295,35 @@ def generate_bins_generic(values, binner, closed):
22832295

22842296
class BinGrouper(BaseGrouper):
22852297

2298+
"""
2299+
This is an internal Grouper class, which actually holds
2300+
the generated groups. In contrast with BaseGrouper,
2301+
BinGrouper get the sorted bins and binlabels to compute group_info
2302+
2303+
Parameters
2304+
----------
2305+
bins : the split index of binlabels to group the item of axis
2306+
binlabels : the label list
2307+
indexer: the indexer created by Grouper
2308+
some grouper (TimeGrouper eg) will sort its axis and the
2309+
group_info of BinGrouper is also sorted
2310+
can use the indexer to reorder as the unsorted axis
2311+
2312+
Examples
2313+
--------
2314+
bins is [2, 4, 6, 8, 10]
2315+
binlabels is DatetimeIndex(['2005-01-01', '2005-01-03',
2316+
'2005-01-05', '2005-01-07', '2005-01-09'],
2317+
dtype='datetime64[ns]', freq='2D')
2318+
2319+
then the group_info is
2320+
(array([0, 0, 1, 1, 2, 2, 3, 3, 4, 4]), array([0, 1, 2, 3, 4]), 5)
2321+
2322+
means the label of each item in axis, the index of label in label
2323+
list, group number
2324+
2325+
"""
2326+
22862327
def __init__(self, bins, binlabels, filter_empty=False, mutated=False,
22872328
indexer=None):
22882329
self.bins = _ensure_int64(bins)
@@ -2457,7 +2498,8 @@ def __init__(self, index, grouper=None, obj=None, name=None, level=None,
24572498
self.grouper, self._labels, self._group_index = \
24582499
index._get_grouper_for_level(self.grouper, level)
24592500

2460-
# a passed Grouper like
2501+
# a passed Grouper like, directly get the grouper in the same way
2502+
# as single grouper groupby, use the group_info to get labels
24612503
elif isinstance(self.grouper, Grouper):
24622504
# get the new grouper
24632505
_, grouper, _ = self.grouper._get_grouper(self.obj)
@@ -2532,6 +2574,7 @@ def ngroups(self):
25322574

25332575
@cache_readonly
25342576
def indices(self):
2577+
# for the situation of groupby list of groupers
25352578
if isinstance(self.grouper, BaseGrouper):
25362579
return self.grouper.indices
25372580
else:

pandas/tests/test_resample.py

+9-6
Original file line numberDiff line numberDiff line change
@@ -3275,13 +3275,16 @@ def test_aggregate_with_nat(self):
32753275
# if NaT is included, 'var', 'std', 'mean', 'first','last'
32763276
# and 'nth' doesn't work yet
32773277

3278+
# Issue: 17530
32783279
def test_scalar_call_versus_list_call(self):
3279-
data_frame = pd.DataFrame({
3280-
'location': ['shanghai', 'beijing', 'shanghai'],
3281-
'time': pd.Series(['2017-08-09 13:32:23', '2017-08-11 23:23:15', '2017-08-11 22:23:15'],
3282-
dtype='datetime64[ns]'),
3283-
'value': [1, 2, 3]
3284-
}).set_index('time')
3280+
data_frame = {
3281+
'location': ['shanghai', 'beijing', 'shanghai'],
3282+
'time': pd.Series(['2017-08-09 13:32:23', '2017-08-11 23:23:15',
3283+
'2017-08-11 22:23:15'],
3284+
dtype='datetime64[ns]'),
3285+
'value': [1, 2, 3]
3286+
}
3287+
data_frame = pd.DataFrame(data_frame).set_index('time')
32853288
grouper = TimeGrouper('D')
32863289
grouped = data_frame.groupby(grouper)
32873290
data1 = grouped.count()

0 commit comments

Comments
 (0)