PEP8 & comment

ruiann · ruiann · commit 026223a8797a · 2017-09-20T11:00:12.000+08:00
diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
@@ -1727,6 +1727,18 @@ class BaseGrouper(object):
     """
     This is an internal Grouper class, which actually holds
     the generated groups
+
+    Parameters
+    ----------
+    axis : the axis to group
+    groupings : all the grouping instances to handle in this grouper
+        for example for grouper list to groupby, need to pass the list
+    sort : True/False
+        whether this grouper will give sorted result or not
+    indexer: the indexer created by Grouper
+        some grouper (TimeGrouper eg) will sort its axis and its
+        group_info is also sorted, so need the indexer to reorder
+
     """
 
     def __init__(self, axis, groupings, sort=True, group_keys=True,
@@ -2283,6 +2295,35 @@ def generate_bins_generic(values, binner, closed):
 
 class BinGrouper(BaseGrouper):
 
+    """
+    This is an internal Grouper class, which actually holds
+    the generated groups. In contrast with BaseGrouper,
+    BinGrouper get the sorted bins and binlabels to compute group_info
+
+    Parameters
+    ----------
+    bins : the split index of binlabels to group the item of axis
+    binlabels : the label list
+    indexer: the indexer created by Grouper
+        some grouper (TimeGrouper eg) will sort its axis and the
+        group_info of BinGrouper is also sorted
+        can use the indexer to reorder as the unsorted axis
+
+    Examples
+    --------
+    bins is [2, 4, 6, 8, 10]
+    binlabels is DatetimeIndex(['2005-01-01', '2005-01-03',
+        '2005-01-05', '2005-01-07', '2005-01-09'],
+        dtype='datetime64[ns]', freq='2D')
+
+    then the group_info is
+    (array([0, 0, 1, 1, 2, 2, 3, 3, 4, 4]), array([0, 1, 2, 3, 4]), 5)
+
+    means the label of each item in axis, the index of label in label
+    list, group number
+
+    """
+
     def __init__(self, bins, binlabels, filter_empty=False, mutated=False,
                  indexer=None):
         self.bins = _ensure_int64(bins)
@@ -2457,7 +2498,8 @@ def __init__(self, index, grouper=None, obj=None, name=None, level=None,
             self.grouper, self._labels, self._group_index = \
                 index._get_grouper_for_level(self.grouper, level)
 
-        # a passed Grouper like
+        # a passed Grouper like, directly get the grouper in the same way
+        # as single grouper groupby, use the group_info to get labels
         elif isinstance(self.grouper, Grouper):
             # get the new grouper
             _, grouper, _ = self.grouper._get_grouper(self.obj)
@@ -2532,6 +2574,7 @@ def ngroups(self):
 
     @cache_readonly
     def indices(self):
+        # for the situation of groupby list of groupers
         if isinstance(self.grouper, BaseGrouper):
             return self.grouper.indices
         else:
diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py
@@ -3275,13 +3275,16 @@ def test_aggregate_with_nat(self):
             # if NaT is included, 'var', 'std', 'mean', 'first','last'
             # and 'nth' doesn't work yet
 
+    # Issue: 17530
     def test_scalar_call_versus_list_call(self):
-        data_frame = pd.DataFrame({
-                                      'location': ['shanghai', 'beijing', 'shanghai'],
-                                      'time': pd.Series(['2017-08-09 13:32:23', '2017-08-11 23:23:15', '2017-08-11 22:23:15'],
-                                                  dtype='datetime64[ns]'),
-                                      'value': [1, 2, 3]
-        }).set_index('time')
+        data_frame = {
+            'location': ['shanghai', 'beijing', 'shanghai'],
+            'time': pd.Series(['2017-08-09 13:32:23', '2017-08-11 23:23:15',
+                               '2017-08-11 22:23:15'],
+                              dtype='datetime64[ns]'),
+            'value': [1, 2, 3]
+        }
+        data_frame = pd.DataFrame(data_frame).set_index('time')
         grouper = TimeGrouper('D')
         grouped = data_frame.groupby(grouper)
         data1 = grouped.count()