From 33a89adff6de821e6de87f48c90efca7693a7ed9 Mon Sep 17 00:00:00 2001
From: Erfan Nariman <erfan.nariman@briccanalytics.nl>
Date: Sat, 3 Oct 2020 13:11:42 +0200
Subject: [PATCH 1/2] blacken groupby.rst

---
 doc/source/user_guide/groupby.rst | 463 ++++++++++++++++--------------
 1 file changed, 254 insertions(+), 209 deletions(-)

diff --git a/doc/source/user_guide/groupby.rst b/doc/source/user_guide/groupby.rst
index 52342de98de79..9696f14f03b56 100644
--- a/doc/source/user_guide/groupby.rst
+++ b/doc/source/user_guide/groupby.rst
@@ -68,19 +68,23 @@ object (more on what the GroupBy object is later), you may do the following:
 
 .. ipython:: python
 
-    df = pd.DataFrame([('bird', 'Falconiformes', 389.0),
-                       ('bird', 'Psittaciformes', 24.0),
-                       ('mammal', 'Carnivora', 80.2),
-                       ('mammal', 'Primates', np.nan),
-                       ('mammal', 'Carnivora', 58)],
-                      index=['falcon', 'parrot', 'lion', 'monkey', 'leopard'],
-                      columns=('class', 'order', 'max_speed'))
+    df = pd.DataFrame(
+        [
+            ("bird", "Falconiformes", 389.0),
+            ("bird", "Psittaciformes", 24.0),
+            ("mammal", "Carnivora", 80.2),
+            ("mammal", "Primates", np.nan),
+            ("mammal", "Carnivora", 58),
+        ],
+        index=["falcon", "parrot", "lion", "monkey", "leopard"],
+        columns=("class", "order", "max_speed"),
+    )
     df
 
     # default is axis=0
-    grouped = df.groupby('class')
-    grouped = df.groupby('order', axis='columns')
-    grouped = df.groupby(['class', 'order'])
+    grouped = df.groupby("class")
+    grouped = df.groupby("order", axis="columns")
+    grouped = df.groupby(["class", "order"])
 
 The mapping can be specified many different ways:
 
@@ -103,12 +107,14 @@ consider the following ``DataFrame``:
 
 .. ipython:: python
 
-   df = pd.DataFrame({'A': ['foo', 'bar', 'foo', 'bar',
-                            'foo', 'bar', 'foo', 'foo'],
-                      'B': ['one', 'one', 'two', 'three',
-                            'two', 'two', 'one', 'three'],
-                      'C': np.random.randn(8),
-                      'D': np.random.randn(8)})
+   df = pd.DataFrame(
+       {
+           "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"],
+           "B": ["one", "one", "two", "three", "two", "two", "one", "three"],
+           "C": np.random.randn(8),
+           "D": np.random.randn(8),
+       }
+   )
    df
 
 On a DataFrame, we obtain a GroupBy object by calling :meth:`~DataFrame.groupby`.
@@ -116,8 +122,8 @@ We could naturally group by either the ``A`` or ``B`` columns, or both:
 
 .. ipython:: python
 
-   grouped = df.groupby('A')
-   grouped = df.groupby(['A', 'B'])
+   grouped = df.groupby("A")
+   grouped = df.groupby(["A", "B"])
 
 .. versionadded:: 0.24
 
@@ -126,8 +132,8 @@ but the specified columns
 
 .. ipython:: python
 
-   df2 = df.set_index(['A', 'B'])
-   grouped = df2.groupby(level=df2.index.names.difference(['B']))
+   df2 = df.set_index(["A", "B"])
+   grouped = df2.groupby(level=df2.index.names.difference(["B"]))
    grouped.sum()
 
 These will split the DataFrame on its index (rows). We could also split by the
@@ -181,9 +187,9 @@ By default the group keys are sorted during the ``groupby`` operation. You may h
 
 .. ipython:: python
 
-   df2 = pd.DataFrame({'X': ['B', 'B', 'A', 'A'], 'Y': [1, 2, 3, 4]})
-   df2.groupby(['X']).sum()
-   df2.groupby(['X'], sort=False).sum()
+   df2 = pd.DataFrame({"X": ["B", "B", "A", "A"], "Y": [1, 2, 3, 4]})
+   df2.groupby(["X"]).sum()
+   df2.groupby(["X"], sort=False).sum()
 
 
 Note that ``groupby`` will preserve the order in which *observations* are sorted *within* each group.
@@ -191,10 +197,10 @@ For example, the groups created by ``groupby()`` below are in the order they app
 
 .. ipython:: python
 
-   df3 = pd.DataFrame({'X': ['A', 'B', 'A', 'B'], 'Y': [1, 4, 3, 2]})
-   df3.groupby(['X']).get_group('A')
+   df3 = pd.DataFrame({"X": ["A", "B", "A", "B"], "Y": [1, 4, 3, 2]})
+   df3.groupby(["X"]).get_group("A")
 
-   df3.groupby(['X']).get_group('B')
+   df3.groupby(["X"]).get_group("B")
 
 
 .. _groupby.dropna:
@@ -236,7 +242,7 @@ above example we have:
 
 .. ipython:: python
 
-   df.groupby('A').groups
+   df.groupby("A").groups
    df.groupby(get_letter_type, axis=1).groups
 
 Calling the standard Python ``len`` function on the GroupBy object just returns
@@ -244,7 +250,7 @@ the length of the ``groups`` dict, so it is largely just a convenience:
 
 .. ipython:: python
 
-   grouped = df.groupby(['A', 'B'])
+   grouped = df.groupby(["A", "B"])
    grouped.groups
    len(grouped)
 
@@ -259,15 +265,14 @@ the length of the ``groups`` dict, so it is largely just a convenience:
    n = 10
    weight = np.random.normal(166, 20, size=n)
    height = np.random.normal(60, 10, size=n)
-   time = pd.date_range('1/1/2000', periods=n)
-   gender = np.random.choice(['male', 'female'], size=n)
-   df = pd.DataFrame({'height': height, 'weight': weight,
-                      'gender': gender}, index=time)
+   time = pd.date_range("1/1/2000", periods=n)
+   gender = np.random.choice(["male", "female"], size=n)
+   df = pd.DataFrame({"height": height, "weight": weight, "gender": gender}, index=time)
 
 .. ipython:: python
 
    df
-   gb = df.groupby('gender')
+   gb = df.groupby("gender")
 
 
 .. ipython::
@@ -291,9 +296,11 @@ Let's create a Series with a two-level ``MultiIndex``.
 .. ipython:: python
 
 
-   arrays = [['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'],
-             ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']]
-   index = pd.MultiIndex.from_arrays(arrays, names=['first', 'second'])
+   arrays = [
+       ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"],
+       ["one", "two", "one", "two", "one", "two", "one", "two"],
+   ]
+   index = pd.MultiIndex.from_arrays(arrays, names=["first", "second"])
    s = pd.Series(np.random.randn(8), index=index)
    s
 
@@ -309,7 +316,7 @@ number:
 
 .. ipython:: python
 
-   s.groupby(level='second').sum()
+   s.groupby(level="second").sum()
 
 The aggregation functions such as ``sum`` will take the level parameter
 directly. Additionally, the resulting index will be named according to the
@@ -317,30 +324,32 @@ chosen level:
 
 .. ipython:: python
 
-   s.sum(level='second')
+   s.sum(level="second")
 
 Grouping with multiple levels is supported.
 
 .. ipython:: python
    :suppress:
 
-   arrays = [['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'],
-             ['doo', 'doo', 'bee', 'bee', 'bop', 'bop', 'bop', 'bop'],
-             ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']]
+   arrays = [
+       ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"],
+       ["doo", "doo", "bee", "bee", "bop", "bop", "bop", "bop"],
+       ["one", "two", "one", "two", "one", "two", "one", "two"],
+   ]
    tuples = list(zip(*arrays))
-   index = pd.MultiIndex.from_tuples(tuples, names=['first', 'second', 'third'])
+   index = pd.MultiIndex.from_tuples(tuples, names=["first", "second", "third"])
    s = pd.Series(np.random.randn(8), index=index)
 
 .. ipython:: python
 
    s
-   s.groupby(level=['first', 'second']).sum()
+   s.groupby(level=["first", "second"]).sum()
 
 Index level names may be supplied as keys.
 
 .. ipython:: python
 
-   s.groupby(['first', 'second']).sum()
+   s.groupby(["first", "second"]).sum()
 
 More on the ``sum`` function and aggregation later.
 
@@ -352,14 +361,14 @@ objects.
 
 .. ipython:: python
 
-   arrays = [['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'],
-             ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']]
+   arrays = [
+       ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"],
+       ["one", "two", "one", "two", "one", "two", "one", "two"],
+   ]
 
-   index = pd.MultiIndex.from_arrays(arrays, names=['first', 'second'])
+   index = pd.MultiIndex.from_arrays(arrays, names=["first", "second"])
 
-   df = pd.DataFrame({'A': [1, 1, 1, 1, 2, 2, 3, 3],
-                      'B': np.arange(8)},
-                     index=index)
+   df = pd.DataFrame({"A": [1, 1, 1, 1, 2, 2, 3, 3], "B": np.arange(8)}, index=index)
 
    df
 
@@ -368,19 +377,19 @@ the ``A`` column.
 
 .. ipython:: python
 
-   df.groupby([pd.Grouper(level=1), 'A']).sum()
+   df.groupby([pd.Grouper(level=1), "A"]).sum()
 
 Index levels may also be specified by name.
 
 .. ipython:: python
 
-   df.groupby([pd.Grouper(level='second'), 'A']).sum()
+   df.groupby([pd.Grouper(level="second"), "A"]).sum()
 
 Index level names may be specified as keys directly to ``groupby``.
 
 .. ipython:: python
 
-   df.groupby(['second', 'A']).sum()
+   df.groupby(["second", "A"]).sum()
 
 DataFrame column selection in GroupBy
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -392,24 +401,26 @@ getting a column from a DataFrame, you can do:
 .. ipython:: python
    :suppress:
 
-   df = pd.DataFrame({'A': ['foo', 'bar', 'foo', 'bar',
-                            'foo', 'bar', 'foo', 'foo'],
-                      'B': ['one', 'one', 'two', 'three',
-                            'two', 'two', 'one', 'three'],
-                      'C': np.random.randn(8),
-                      'D': np.random.randn(8)})
+   df = pd.DataFrame(
+       {
+           "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"],
+           "B": ["one", "one", "two", "three", "two", "two", "one", "three"],
+           "C": np.random.randn(8),
+           "D": np.random.randn(8),
+       }
+   )
 
 .. ipython:: python
 
-   grouped = df.groupby(['A'])
-   grouped_C = grouped['C']
-   grouped_D = grouped['D']
+   grouped = df.groupby(["A"])
+   grouped_C = grouped["C"]
+   grouped_D = grouped["D"]
 
 This is mainly syntactic sugar for the alternative and much more verbose:
 
 .. ipython:: python
 
-   df['C'].groupby(df['A'])
+   df["C"].groupby(df["A"])
 
 Additionally this method avoids recomputing the internal grouping information
 derived from the passed key.
@@ -450,13 +461,13 @@ A single group can be selected using
 
 .. ipython:: python
 
-   grouped.get_group('bar')
+   grouped.get_group("bar")
 
 Or for an object grouped on multiple columns:
 
 .. ipython:: python
 
-   df.groupby(['A', 'B']).get_group(('bar', 'one'))
+   df.groupby(["A", "B"]).get_group(("bar", "one"))
 
 .. _groupby.aggregate:
 
@@ -474,10 +485,10 @@ An obvious one is aggregation via the
 
 .. ipython:: python
 
-   grouped = df.groupby('A')
+   grouped = df.groupby("A")
    grouped.aggregate(np.sum)
 
-   grouped = df.groupby(['A', 'B'])
+   grouped = df.groupby(["A", "B"])
    grouped.aggregate(np.sum)
 
 As you can see, the result of the aggregation will have the group names as the
@@ -487,17 +498,17 @@ changed by using the ``as_index`` option:
 
 .. ipython:: python
 
-   grouped = df.groupby(['A', 'B'], as_index=False)
+   grouped = df.groupby(["A", "B"], as_index=False)
    grouped.aggregate(np.sum)
 
-   df.groupby('A', as_index=False).sum()
+   df.groupby("A", as_index=False).sum()
 
 Note that you could use the ``reset_index`` DataFrame function to achieve the
 same result as the column names are stored in the resulting ``MultiIndex``:
 
 .. ipython:: python
 
-   df.groupby(['A', 'B']).sum().reset_index()
+   df.groupby(["A", "B"]).sum().reset_index()
 
 Another simple aggregation example is to compute the size of each group.
 This is included in GroupBy as the ``size`` method. It returns a Series whose
@@ -559,8 +570,8 @@ aggregation with, outputting a DataFrame:
 
 .. ipython:: python
 
-   grouped = df.groupby('A')
-   grouped['C'].agg([np.sum, np.mean, np.std])
+   grouped = df.groupby("A")
+   grouped["C"].agg([np.sum, np.mean, np.std])
 
 On a grouped ``DataFrame``, you can pass a list of functions to apply to each
 column, which produces an aggregated result with a hierarchical index:
@@ -575,19 +586,21 @@ need to rename, then you can add in a chained operation for a ``Series`` like th
 
 .. ipython:: python
 
-   (grouped['C'].agg([np.sum, np.mean, np.std])
-                .rename(columns={'sum': 'foo',
-                                 'mean': 'bar',
-                                 'std': 'baz'}))
+   (
+       grouped["C"]
+       .agg([np.sum, np.mean, np.std])
+       .rename(columns={"sum": "foo", "mean": "bar", "std": "baz"})
+   )
 
 For a grouped ``DataFrame``, you can rename in a similar manner:
 
 .. ipython:: python
 
-   (grouped.agg([np.sum, np.mean, np.std])
-           .rename(columns={'sum': 'foo',
-                            'mean': 'bar',
-                            'std': 'baz'}))
+   (
+       grouped.agg([np.sum, np.mean, np.std]).rename(
+           columns={"sum": "foo", "mean": "bar", "std": "baz"}
+       )
+   )
 
 .. note::
 
@@ -598,7 +611,7 @@ For a grouped ``DataFrame``, you can rename in a similar manner:
    .. ipython:: python
       :okexcept:
 
-      grouped['C'].agg(['sum', 'sum'])
+      grouped["C"].agg(["sum", "sum"])
 
 
    Pandas *does* allow you to provide multiple lambdas. In this case, pandas
@@ -607,8 +620,7 @@ For a grouped ``DataFrame``, you can rename in a similar manner:
 
    .. ipython:: python
 
-      grouped['C'].agg([lambda x: x.max() - x.min(),
-                        lambda x: x.median() - x.mean()])
+      grouped["C"].agg([lambda x: x.max() - x.min(), lambda x: x.median() - x.mean()])
 
 
 
@@ -631,15 +643,19 @@ accepts the special syntax in :meth:`GroupBy.agg`, known as "named aggregation",
 
 .. ipython:: python
 
-   animals = pd.DataFrame({'kind': ['cat', 'dog', 'cat', 'dog'],
-                           'height': [9.1, 6.0, 9.5, 34.0],
-                           'weight': [7.9, 7.5, 9.9, 198.0]})
+   animals = pd.DataFrame(
+       {
+           "kind": ["cat", "dog", "cat", "dog"],
+           "height": [9.1, 6.0, 9.5, 34.0],
+           "weight": [7.9, 7.5, 9.9, 198.0],
+       }
+   )
    animals
 
    animals.groupby("kind").agg(
-       min_height=pd.NamedAgg(column='height', aggfunc='min'),
-       max_height=pd.NamedAgg(column='height', aggfunc='max'),
-       average_weight=pd.NamedAgg(column='weight', aggfunc=np.mean),
+       min_height=pd.NamedAgg(column="height", aggfunc="min"),
+       max_height=pd.NamedAgg(column="height", aggfunc="max"),
+       average_weight=pd.NamedAgg(column="weight", aggfunc=np.mean),
    )
 
 
@@ -648,9 +664,9 @@ accepts the special syntax in :meth:`GroupBy.agg`, known as "named aggregation",
 .. ipython:: python
 
    animals.groupby("kind").agg(
-       min_height=('height', 'min'),
-       max_height=('height', 'max'),
-       average_weight=('weight', np.mean),
+       min_height=("height", "min"),
+       max_height=("height", "max"),
+       average_weight=("weight", np.mean),
    )
 
 
@@ -659,9 +675,11 @@ and unpack the keyword arguments
 
 .. ipython:: python
 
-   animals.groupby("kind").agg(**{
-       'total weight': pd.NamedAgg(column='weight', aggfunc=sum),
-   })
+   animals.groupby("kind").agg(
+       **{
+           "total weight": pd.NamedAgg(column="weight", aggfunc=sum),
+       }
+   )
 
 Additional keyword arguments are not passed through to the aggregation functions. Only pairs
 of ``(column, aggfunc)`` should be passed as ``**kwargs``. If your aggregation functions
@@ -680,8 +698,8 @@ no column selection, so the values are just the functions.
 .. ipython:: python
 
    animals.groupby("kind").height.agg(
-       min_height='min',
-       max_height='max',
+       min_height="min",
+       max_height="max",
    )
 
 Applying different functions to DataFrame columns
@@ -692,8 +710,7 @@ columns of a DataFrame:
 
 .. ipython:: python
 
-   grouped.agg({'C': np.sum,
-                'D': lambda x: np.std(x, ddof=1)})
+   grouped.agg({"C": np.sum, "D": lambda x: np.std(x, ddof=1)})
 
 The function names can also be strings. In order for a string to be valid it
 must be either implemented on GroupBy or available via :ref:`dispatching
@@ -701,7 +718,7 @@ must be either implemented on GroupBy or available via :ref:`dispatching
 
 .. ipython:: python
 
-   grouped.agg({'C': 'sum', 'D': 'std'})
+   grouped.agg({"C": "sum", "D": "std"})
 
 .. _groupby.aggregate.cython:
 
@@ -713,8 +730,8 @@ optimized Cython implementations:
 
 .. ipython:: python
 
-   df.groupby('A').sum()
-   df.groupby(['A', 'B']).mean()
+   df.groupby("A").sum()
+   df.groupby(["A", "B"]).mean()
 
 Of course ``sum`` and ``mean`` are implemented on pandas objects, so the above
 code would work even without the special versions via dispatching (see below).
@@ -743,15 +760,14 @@ For example, suppose we wished to standardize the data within each group:
 
 .. ipython:: python
 
-   index = pd.date_range('10/1/1999', periods=1100)
+   index = pd.date_range("10/1/1999", periods=1100)
    ts = pd.Series(np.random.normal(0.5, 2, 1100), index)
    ts = ts.rolling(window=100, min_periods=100).mean().dropna()
 
    ts.head()
    ts.tail()
 
-   transformed = (ts.groupby(lambda x: x.year)
-                    .transform(lambda x: (x - x.mean()) / x.std()))
+   transformed = ts.groupby(lambda x: x.year).transform(lambda x: (x - x.mean()) / x.std())
 
 We would expect the result to now have mean 0 and standard deviation 1 within
 each group, which we can easily check:
@@ -772,7 +788,7 @@ We can also visually compare the original and transformed data sets.
 
 .. ipython:: python
 
-   compare = pd.DataFrame({'Original': ts, 'Transformed': transformed})
+   compare = pd.DataFrame({"Original": ts, "Transformed": transformed})
 
    @savefig groupby_transform_plot.png
    compare.plot()
@@ -788,8 +804,8 @@ Alternatively, the built-in methods could be used to produce the same outputs.
 
 .. ipython:: python
 
-   max = ts.groupby(lambda x: x.year).transform('max')
-   min = ts.groupby(lambda x: x.year).transform('min')
+   max = ts.groupby(lambda x: x.year).transform("max")
+   min = ts.groupby(lambda x: x.year).transform("min")
 
    max - min
 
@@ -798,7 +814,7 @@ Another common data transform is to replace missing data with the group mean.
 .. ipython:: python
    :suppress:
 
-   cols = ['A', 'B', 'C']
+   cols = ["A", "B", "C"]
    values = np.random.randn(1000, 3)
    values[np.random.randint(0, 1000, 100), 0] = np.nan
    values[np.random.randint(0, 1000, 50), 1] = np.nan
@@ -809,7 +825,7 @@ Another common data transform is to replace missing data with the group mean.
 
    data_df
 
-   countries = np.array(['US', 'UK', 'GR', 'JP'])
+   countries = np.array(["US", "UK", "GR", "JP"])
    key = countries[np.random.randint(0, 4, 1000)]
 
    grouped = data_df.groupby(key)
@@ -859,11 +875,10 @@ the column B based on the groups of column A.
 
 .. ipython:: python
 
-   df_re = pd.DataFrame({'A': [1] * 10 + [5] * 10,
-                         'B': np.arange(20)})
+   df_re = pd.DataFrame({"A": [1] * 10 + [5] * 10, "B": np.arange(20)})
    df_re
 
-   df_re.groupby('A').rolling(4).B.mean()
+   df_re.groupby("A").rolling(4).B.mean()
 
 
 The ``expanding()`` method will accumulate a given operation
@@ -872,7 +887,7 @@ group.
 
 .. ipython:: python
 
-   df_re.groupby('A').expanding().sum()
+   df_re.groupby("A").expanding().sum()
 
 
 Suppose you want to use the ``resample()`` method to get a daily
@@ -881,13 +896,16 @@ missing values with the ``ffill()`` method.
 
 .. ipython:: python
 
-   df_re = pd.DataFrame({'date': pd.date_range(start='2016-01-01', periods=4,
-                                               freq='W'),
-                         'group': [1, 1, 2, 2],
-                         'val': [5, 6, 7, 8]}).set_index('date')
+   df_re = pd.DataFrame(
+       {
+           "date": pd.date_range(start="2016-01-01", periods=4, freq="W"),
+           "group": [1, 1, 2, 2],
+           "val": [5, 6, 7, 8],
+       }
+   ).set_index("date")
    df_re
 
-   df_re.groupby('group').resample('1D').ffill()
+   df_re.groupby("group").resample("1D").ffill()
 
 .. _groupby.filter:
 
@@ -911,8 +929,8 @@ with only a couple members.
 
 .. ipython:: python
 
-   dff = pd.DataFrame({'A': np.arange(8), 'B': list('aabbbbcc')})
-   dff.groupby('B').filter(lambda x: len(x) > 2)
+   dff = pd.DataFrame({"A": np.arange(8), "B": list("aabbbbcc")})
+   dff.groupby("B").filter(lambda x: len(x) > 2)
 
 Alternatively, instead of dropping the offending groups, we can return a
 like-indexed objects where the groups that do not pass the filter are filled
@@ -920,14 +938,14 @@ with NaNs.
 
 .. ipython:: python
 
-   dff.groupby('B').filter(lambda x: len(x) > 2, dropna=False)
+   dff.groupby("B").filter(lambda x: len(x) > 2, dropna=False)
 
 For DataFrames with multiple columns, filters should explicitly specify a column as the filter criterion.
 
 .. ipython:: python
 
-   dff['C'] = np.arange(8)
-   dff.groupby('B').filter(lambda x: len(x['C']) > 2)
+   dff["C"] = np.arange(8)
+   dff.groupby("B").filter(lambda x: len(x["C"]) > 2)
 
 .. note::
 
@@ -939,7 +957,7 @@ For DataFrames with multiple columns, filters should explicitly specify a column
 
    .. ipython:: python
 
-      dff.groupby('B').head(2)
+      dff.groupby("B").head(2)
 
 
 .. _groupby.dispatch:
@@ -953,7 +971,7 @@ functions:
 
 .. ipython:: python
 
-   grouped = df.groupby('A')
+   grouped = df.groupby("A")
    grouped.agg(lambda x: x.std())
 
 But, it's rather verbose and can be untidy if you need to pass additional
@@ -973,12 +991,14 @@ next). This enables some operations to be carried out rather succinctly:
 
 .. ipython:: python
 
-   tsdf = pd.DataFrame(np.random.randn(1000, 3),
-                       index=pd.date_range('1/1/2000', periods=1000),
-                       columns=['A', 'B', 'C'])
+   tsdf = pd.DataFrame(
+       np.random.randn(1000, 3),
+       index=pd.date_range("1/1/2000", periods=1000),
+       columns=["A", "B", "C"],
+   )
    tsdf.iloc[::2] = np.nan
    grouped = tsdf.groupby(lambda x: x.year)
-   grouped.fillna(method='pad')
+   grouped.fillna(method="pad")
 
 In this example, we chopped the collection of time series into yearly chunks
 then independently called :ref:`fillna <missing_data.fillna>` on the
@@ -989,7 +1009,7 @@ The ``nlargest`` and ``nsmallest`` methods work on ``Series`` style groupbys:
 .. ipython:: python
 
    s = pd.Series([9, 8, 7, 5, 19, 1, 4.2, 3.3])
-   g = pd.Series(list('abababab'))
+   g = pd.Series(list("abababab"))
    gb = s.groupby(g)
    gb.nlargest(3)
    gb.nsmallest(3)
@@ -1008,10 +1028,10 @@ for both ``aggregate`` and ``transform`` in many standard use cases. However,
 .. ipython:: python
 
    df
-   grouped = df.groupby('A')
+   grouped = df.groupby("A")
 
    # could also just call .describe()
-   grouped['C'].apply(lambda x: x.describe())
+   grouped["C"].apply(lambda x: x.describe())
 
 The dimension of the returned result can also change:
 
@@ -1032,7 +1052,8 @@ that is itself a series, and possibly upcast the result to a DataFrame:
 .. ipython:: python
 
     def f(x):
-        return pd.Series([x, x ** 2], index=['x', 'x^2'])
+        return pd.Series([x, x ** 2], index=["x", "x^2"])
+
 
     s = pd.Series(np.random.rand(5))
     s
@@ -1133,7 +1154,7 @@ will be (silently) dropped. Thus, this does not pose any problems:
 
 .. ipython:: python
 
-   df.groupby('A').std()
+   df.groupby("A").std()
 
 Note that ``df.groupby('A').colname.std().`` is more efficient than
 ``df.groupby('A').std().colname``, so if the result of an aggregation function
@@ -1151,23 +1172,29 @@ is only interesting over one column (here ``colname``), it may be filtered
 .. ipython:: python
 
     from decimal import Decimal
+
     df_dec = pd.DataFrame(
-        {'id': [1, 2, 1, 2],
-         'int_column': [1, 2, 3, 4],
-         'dec_column': [Decimal('0.50'), Decimal('0.15'),
-                        Decimal('0.25'), Decimal('0.40')]
-         }
+        {
+            "id": [1, 2, 1, 2],
+            "int_column": [1, 2, 3, 4],
+            "dec_column": [
+                Decimal("0.50"),
+                Decimal("0.15"),
+                Decimal("0.25"),
+                Decimal("0.40"),
+            ],
+        }
     )
 
     # Decimal columns can be sum'd explicitly by themselves...
-    df_dec.groupby(['id'])[['dec_column']].sum()
+    df_dec.groupby(["id"])[["dec_column"]].sum()
 
     # ...but cannot be combined with standard data types or they will be excluded
-    df_dec.groupby(['id'])[['int_column', 'dec_column']].sum()
+    df_dec.groupby(["id"])[["int_column", "dec_column"]].sum()
 
     # Use .agg function to aggregate over standard and "nuisance" data types
     # at the same time
-    df_dec.groupby(['id']).agg({'int_column': 'sum', 'dec_column': 'sum'})
+    df_dec.groupby(["id"]).agg({"int_column": "sum", "dec_column": "sum"})
 
 .. _groupby.observed:
 
@@ -1182,25 +1209,27 @@ Show all values:
 
 .. ipython:: python
 
-   pd.Series([1, 1, 1]).groupby(pd.Categorical(['a', 'a', 'a'],
-                                               categories=['a', 'b']),
-                                observed=False).count()
+   pd.Series([1, 1, 1]).groupby(
+       pd.Categorical(["a", "a", "a"], categories=["a", "b"]), observed=False
+   ).count()
 
 Show only the observed values:
 
 .. ipython:: python
 
-   pd.Series([1, 1, 1]).groupby(pd.Categorical(['a', 'a', 'a'],
-                                               categories=['a', 'b']),
-                                observed=True).count()
+   pd.Series([1, 1, 1]).groupby(
+       pd.Categorical(["a", "a", "a"], categories=["a", "b"]), observed=True
+   ).count()
 
 The returned dtype of the grouped will *always* include *all* of the categories that were grouped.
 
 .. ipython:: python
 
-   s = pd.Series([1, 1, 1]).groupby(pd.Categorical(['a', 'a', 'a'],
-                                                   categories=['a', 'b']),
-                                    observed=False).count()
+   s = (
+       pd.Series([1, 1, 1])
+       .groupby(pd.Categorical(["a", "a", "a"], categories=["a", "b"]), observed=False)
+       .count()
+   )
    s.index.dtype
 
 .. _groupby.missing:
@@ -1224,7 +1253,7 @@ can be used as group keys. If so, the order of the levels will be preserved:
 
    data = pd.Series(np.random.randn(100))
 
-   factor = pd.qcut(data, [0, .25, .5, .75, 1.])
+   factor = pd.qcut(data, [0, 0.25, 0.5, 0.75, 1.0])
 
    data.groupby(factor).mean()
 
@@ -1240,19 +1269,23 @@ use the ``pd.Grouper`` to provide this local control.
 
    import datetime
 
-   df = pd.DataFrame({'Branch': 'A A A A A A A B'.split(),
-                      'Buyer': 'Carl Mark Carl Carl Joe Joe Joe Carl'.split(),
-                      'Quantity': [1, 3, 5, 1, 8, 1, 9, 3],
-                      'Date': [
-                          datetime.datetime(2013, 1, 1, 13, 0),
-                          datetime.datetime(2013, 1, 1, 13, 5),
-                          datetime.datetime(2013, 10, 1, 20, 0),
-                          datetime.datetime(2013, 10, 2, 10, 0),
-                          datetime.datetime(2013, 10, 1, 20, 0),
-                          datetime.datetime(2013, 10, 2, 10, 0),
-                          datetime.datetime(2013, 12, 2, 12, 0),
-                          datetime.datetime(2013, 12, 2, 14, 0)]
-                      })
+   df = pd.DataFrame(
+       {
+           "Branch": "A A A A A A A B".split(),
+           "Buyer": "Carl Mark Carl Carl Joe Joe Joe Carl".split(),
+           "Quantity": [1, 3, 5, 1, 8, 1, 9, 3],
+           "Date": [
+               datetime.datetime(2013, 1, 1, 13, 0),
+               datetime.datetime(2013, 1, 1, 13, 5),
+               datetime.datetime(2013, 10, 1, 20, 0),
+               datetime.datetime(2013, 10, 2, 10, 0),
+               datetime.datetime(2013, 10, 1, 20, 0),
+               datetime.datetime(2013, 10, 2, 10, 0),
+               datetime.datetime(2013, 12, 2, 12, 0),
+               datetime.datetime(2013, 12, 2, 14, 0),
+           ],
+       }
+   )
 
    df
 
@@ -1260,18 +1293,18 @@ Groupby a specific column with the desired frequency. This is like resampling.
 
 .. ipython:: python
 
-   df.groupby([pd.Grouper(freq='1M', key='Date'), 'Buyer']).sum()
+   df.groupby([pd.Grouper(freq="1M", key="Date"), "Buyer"]).sum()
 
 You have an ambiguous specification in that you have a named index and a column
 that could be potential groupers.
 
 .. ipython:: python
 
-   df = df.set_index('Date')
-   df['Date'] = df.index + pd.offsets.MonthEnd(2)
-   df.groupby([pd.Grouper(freq='6M', key='Date'), 'Buyer']).sum()
+   df = df.set_index("Date")
+   df["Date"] = df.index + pd.offsets.MonthEnd(2)
+   df.groupby([pd.Grouper(freq="6M", key="Date"), "Buyer"]).sum()
 
-   df.groupby([pd.Grouper(freq='6M', level='Date'), 'Buyer']).sum()
+   df.groupby([pd.Grouper(freq="6M", level="Date"), "Buyer"]).sum()
 
 
 Taking the first rows of each group
@@ -1281,10 +1314,10 @@ Just like for a DataFrame or Series you can call head and tail on a groupby:
 
 .. ipython:: python
 
-   df = pd.DataFrame([[1, 2], [1, 4], [5, 6]], columns=['A', 'B'])
+   df = pd.DataFrame([[1, 2], [1, 4], [5, 6]], columns=["A", "B"])
    df
 
-   g = df.groupby('A')
+   g = df.groupby("A")
    g.head(1)
 
    g.tail(1)
@@ -1302,8 +1335,8 @@ will return a single row (or no row) per group if you pass an int for n:
 
 .. ipython:: python
 
-   df = pd.DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=['A', 'B'])
-   g = df.groupby('A')
+   df = pd.DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=["A", "B"])
+   g = df.groupby("A")
 
    g.nth(0)
    g.nth(-1)
@@ -1314,21 +1347,21 @@ If you want to select the nth not-null item, use the ``dropna`` kwarg. For a Dat
 .. ipython:: python
 
    # nth(0) is the same as g.first()
-   g.nth(0, dropna='any')
+   g.nth(0, dropna="any")
    g.first()
 
    # nth(-1) is the same as g.last()
-   g.nth(-1, dropna='any')  # NaNs denote group exhausted when using dropna
+   g.nth(-1, dropna="any")  # NaNs denote group exhausted when using dropna
    g.last()
 
-   g.B.nth(0, dropna='all')
+   g.B.nth(0, dropna="all")
 
 As with other methods, passing ``as_index=False``, will achieve a filtration, which returns the grouped row.
 
 .. ipython:: python
 
-   df = pd.DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=['A', 'B'])
-   g = df.groupby('A', as_index=False)
+   df = pd.DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=["A", "B"])
+   g = df.groupby("A", as_index=False)
 
    g.nth(0)
    g.nth(-1)
@@ -1337,8 +1370,8 @@ You can also select multiple rows from each group by specifying multiple nth val
 
 .. ipython:: python
 
-   business_dates = pd.date_range(start='4/1/2014', end='6/30/2014', freq='B')
-   df = pd.DataFrame(1, index=business_dates, columns=['a', 'b'])
+   business_dates = pd.date_range(start="4/1/2014", end="6/30/2014", freq="B")
+   df = pd.DataFrame(1, index=business_dates, columns=["a", "b"])
    # get the first, 4th, and last date index for each month
    df.groupby([df.index.year, df.index.month]).nth([0, 3, -1])
 
@@ -1350,12 +1383,12 @@ To see the order in which each row appears within its group, use the
 
 .. ipython:: python
 
-   dfg = pd.DataFrame(list('aaabba'), columns=['A'])
+   dfg = pd.DataFrame(list("aaabba"), columns=["A"])
    dfg
 
-   dfg.groupby('A').cumcount()
+   dfg.groupby("A").cumcount()
 
-   dfg.groupby('A').cumcount(ascending=False)
+   dfg.groupby("A").cumcount(ascending=False)
 
 .. _groupby.ngroup:
 
@@ -1374,12 +1407,12 @@ order they are first observed.
 
 .. ipython:: python
 
-   dfg = pd.DataFrame(list('aaabba'), columns=['A'])
+   dfg = pd.DataFrame(list("aaabba"), columns=["A"])
    dfg
 
-   dfg.groupby('A').ngroup()
+   dfg.groupby("A").ngroup()
 
-   dfg.groupby('A').ngroup(ascending=False)
+   dfg.groupby("A").ngroup(ascending=False)
 
 Plotting
 ~~~~~~~~
@@ -1392,8 +1425,8 @@ the values in column 1 where the group is "B" are 3 higher on average.
 
    np.random.seed(1234)
    df = pd.DataFrame(np.random.randn(50, 2))
-   df['g'] = np.random.choice(['A', 'B'], size=50)
-   df.loc[df['g'] == 'B', 1] += 3
+   df["g"] = np.random.choice(["A", "B"], size=50)
+   df.loc[df["g"] == "B", 1] += 3
 
 We can easily visualize this with a boxplot:
 
@@ -1401,7 +1434,7 @@ We can easily visualize this with a boxplot:
    :okwarning:
 
    @savefig groupby_boxplot.png
-   df.groupby('g').boxplot()
+   df.groupby("g").boxplot()
 
 The result of calling ``boxplot`` is a dictionary whose keys are the values
 of our grouping column ``g`` ("A" and "B"). The values of the resulting dictionary
@@ -1436,20 +1469,26 @@ code more readable. First we set the data:
 .. ipython:: python
 
    n = 1000
-   df = pd.DataFrame({'Store': np.random.choice(['Store_1', 'Store_2'], n),
-                      'Product': np.random.choice(['Product_1',
-                                                   'Product_2'], n),
-                      'Revenue': (np.random.random(n) * 50 + 10).round(2),
-                      'Quantity': np.random.randint(1, 10, size=n)})
+   df = pd.DataFrame(
+       {
+           "Store": np.random.choice(["Store_1", "Store_2"], n),
+           "Product": np.random.choice(["Product_1", "Product_2"], n),
+           "Revenue": (np.random.random(n) * 50 + 10).round(2),
+           "Quantity": np.random.randint(1, 10, size=n),
+       }
+   )
    df.head(2)
 
 Now, to find prices per store/product, we can simply do:
 
 .. ipython:: python
 
-   (df.groupby(['Store', 'Product'])
-      .pipe(lambda grp: grp.Revenue.sum() / grp.Quantity.sum())
-      .unstack().round(2))
+   (
+       df.groupby(["Store", "Product"])
+       .pipe(lambda grp: grp.Revenue.sum() / grp.Quantity.sum())
+       .unstack()
+       .round(2)
+   )
 
 Piping can also be expressive when you want to deliver a grouped object to some
 arbitrary function, for example:
@@ -1459,7 +1498,8 @@ arbitrary function, for example:
    def mean(groupby):
        return groupby.mean()
 
-   df.groupby(['Store', 'Product']).pipe(mean)
+
+   df.groupby(["Store", "Product"]).pipe(mean)
 
 where ``mean`` takes a GroupBy object and finds the mean of the Revenue and Quantity
 columns respectively for each Store-Product combination. The ``mean`` function can
@@ -1476,8 +1516,7 @@ Regroup columns of a DataFrame according to their sum, and sum the aggregated on
 
 .. ipython:: python
 
-   df = pd.DataFrame({'a': [1, 0, 0], 'b': [0, 1, 0],
-                      'c': [1, 0, 0], 'd': [2, 3, 4]})
+   df = pd.DataFrame({"a": [1, 0, 0], "b": [0, 1, 0], "c": [1, 0, 0], "d": [2, 3, 4]})
    df
    df.groupby(df.sum(), axis=1).sum()
 
@@ -1536,16 +1575,22 @@ column index name will be used as the name of the inserted column:
 
 .. ipython:: python
 
-   df = pd.DataFrame({'a': [0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2],
-                      'b': [0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1],
-                      'c': [1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0],
-                      'd': [0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1]})
+   df = pd.DataFrame(
+       {
+           "a": [0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2],
+           "b": [0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1],
+           "c": [1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0],
+           "d": [0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1],
+       }
+   )
+
 
    def compute_metrics(x):
-       result = {'b_sum': x['b'].sum(), 'c_mean': x['c'].mean()}
-       return pd.Series(result, name='metrics')
+       result = {"b_sum": x["b"].sum(), "c_mean": x["c"].mean()}
+       return pd.Series(result, name="metrics")
+
 
-   result = df.groupby('a').apply(compute_metrics)
+   result = df.groupby("a").apply(compute_metrics)
 
    result
 

From bcbac2a3d1adb95afc8cbb961baf29d26a0c5289 Mon Sep 17 00:00:00 2001
From: Erfan Nariman <34067903+erfannariman@users.noreply.github.com>
Date: Sat, 3 Oct 2020 19:35:01 +0200
Subject: [PATCH 2/2] Update doc/source/user_guide/groupby.rst

Co-authored-by: Daniel Saxton <2658661+dsaxton@users.noreply.github.com>
---
 doc/source/user_guide/groupby.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/user_guide/groupby.rst b/doc/source/user_guide/groupby.rst
index 9696f14f03b56..ec64442319a84 100644
--- a/doc/source/user_guide/groupby.rst
+++ b/doc/source/user_guide/groupby.rst
@@ -677,7 +677,7 @@ and unpack the keyword arguments
 
    animals.groupby("kind").agg(
        **{
-           "total weight": pd.NamedAgg(column="weight", aggfunc=sum),
+           "total weight": pd.NamedAgg(column="weight", aggfunc=sum)
        }
    )