From 4f60f72e8b75b512ca6e5c214789816051178265 Mon Sep 17 00:00:00 2001
From: PrayagS <prayag.s@ahduni.edu.in>
Date: Sat, 3 Oct 2020 00:42:59 +0530
Subject: [PATCH 1/3] DOC: use black to fix code style in doc pandas-dev#36777

---
 .../comparison/comparison_with_r.rst          | 153 +++++++++++-------
 .../comparison/comparison_with_sas.rst        | 130 +++++++--------
 .../comparison/comparison_with_sql.rst        | 110 +++++++------
 .../comparison/comparison_with_stata.rst      | 120 +++++++-------
 4 files changed, 289 insertions(+), 224 deletions(-)

diff --git a/doc/source/getting_started/comparison/comparison_with_r.rst b/doc/source/getting_started/comparison/comparison_with_r.rst
index e1a4cfe49b7d1..123b911a97aaa 100644
--- a/doc/source/getting_started/comparison/comparison_with_r.rst
+++ b/doc/source/getting_started/comparison/comparison_with_r.rst
@@ -122,16 +122,16 @@ Selecting multiple columns by name in ``pandas`` is straightforward
 
 .. ipython:: python
 
-   df = pd.DataFrame(np.random.randn(10, 3), columns=list('abc'))
-   df[['a', 'c']]
-   df.loc[:, ['a', 'c']]
+   df = pd.DataFrame(np.random.randn(10, 3), columns=list("abc"))
+   df[["a", "c"]]
+   df.loc[:, ["a", "c"]]
 
 Selecting multiple noncontiguous columns by integer location can be achieved
 with a combination of the ``iloc`` indexer attribute and ``numpy.r_``.
 
 .. ipython:: python
 
-   named = list('abcdefg')
+   named = list("abcdefg")
    n = 30
    columns = named + np.arange(len(named), n).tolist()
    df = pd.DataFrame(np.random.randn(n, n), columns=columns)
@@ -160,14 +160,29 @@ function.
 .. ipython:: python
 
    df = pd.DataFrame(
-       {'v1': [1, 3, 5, 7, 8, 3, 5, np.nan, 4, 5, 7, 9],
-        'v2': [11, 33, 55, 77, 88, 33, 55, np.nan, 44, 55, 77, 99],
-        'by1': ["red", "blue", 1, 2, np.nan, "big", 1, 2, "red", 1, np.nan, 12],
-        'by2': ["wet", "dry", 99, 95, np.nan, "damp", 95, 99, "red", 99, np.nan,
-                np.nan]})
-
-   g = df.groupby(['by1', 'by2'])
-   g[['v1', 'v2']].mean()
+        {
+            "v1": [1, 3, 5, 7, 8, 3, 5, np.nan, 4, 5, 7, 9],
+            "v2": [11, 33, 55, 77, 88, 33, 55, np.nan, 44, 55, 77, 99],
+            "by1": ["red", "blue", 1, 2, np.nan, "big", 1, 2, "red", 1, np.nan, 12],
+            "by2": [
+                "wet",
+                "dry",
+                99,
+                95,
+                np.nan,
+                "damp",
+                95,
+                99,
+                "red",
+                99,
+                np.nan,
+                np.nan,
+            ],
+        }
+    )
+
+    g = df.groupby(["by1", "by2"])
+    g[["v1", "v2"]].mean()
 
 For more details and examples see :ref:`the groupby documentation
 <groupby.split>`.
@@ -228,11 +243,14 @@ In ``pandas`` we may use :meth:`~pandas.pivot_table` method to handle this:
    import string
 
    baseball = pd.DataFrame(
-       {'team': ["team %d" % (x + 1) for x in range(5)] * 5,
-        'player': random.sample(list(string.ascii_lowercase), 25),
-        'batting avg': np.random.uniform(.200, .400, 25)})
+        {
+            "team": ["team %d" % (x + 1) for x in range(5)] * 5,
+            "player": random.sample(list(string.ascii_lowercase), 25),
+            "batting avg": np.random.uniform(0.200, 0.400, 25),
+        }
+    )
 
-   baseball.pivot_table(values='batting avg', columns='team', aggfunc=np.max)
+    baseball.pivot_table(values="batting avg", columns="team", aggfunc=np.max)
 
 For more details and examples see :ref:`the reshaping documentation
 <reshaping.pivot>`.
@@ -256,10 +274,10 @@ index/slice as well as standard boolean indexing:
 
 .. ipython:: python
 
-   df = pd.DataFrame({'a': np.random.randn(10), 'b': np.random.randn(10)})
-   df.query('a <= b')
-   df[df['a'] <= df['b']]
-   df.loc[df['a'] <= df['b']]
+   df = pd.DataFrame({"a": np.random.randn(10), "b": np.random.randn(10)})
+   df.query("a <= b")
+   df[df["a"] <= df["b"]]
+   df.loc[df["a"] <= df["b"]]
 
 For more details and examples see :ref:`the query documentation
 <indexing.query>`.
@@ -282,9 +300,9 @@ In ``pandas`` the equivalent expression, using the
 
 .. ipython:: python
 
-   df = pd.DataFrame({'a': np.random.randn(10), 'b': np.random.randn(10)})
-   df.eval('a + b')
-   df['a'] + df['b']  # same as the previous expression
+   df = pd.DataFrame({"a": np.random.randn(10), "b": np.random.randn(10)})
+   df.eval("a + b")
+   df["a"] + df["b"]  # same as the previous expression
 
 In certain cases :meth:`~pandas.DataFrame.eval` will be much faster than
 evaluation in pure Python. For more details and examples see :ref:`the eval
@@ -334,14 +352,18 @@ In ``pandas`` the equivalent expression, using the
 
 .. ipython:: python
 
-   df = pd.DataFrame({'x': np.random.uniform(1., 168., 120),
-                      'y': np.random.uniform(7., 334., 120),
-                      'z': np.random.uniform(1.7, 20.7, 120),
-                      'month': [5, 6, 7, 8] * 30,
-                      'week': np.random.randint(1, 4, 120)})
+   df = pd.DataFrame(
+        {
+            "x": np.random.uniform(1.0, 168.0, 120),
+            "y": np.random.uniform(7.0, 334.0, 120),
+            "z": np.random.uniform(1.7, 20.7, 120),
+            "month": [5, 6, 7, 8] * 30,
+            "week": np.random.randint(1, 4, 120),
+        }
+    )
 
-   grouped = df.groupby(['month', 'week'])
-   grouped['x'].agg([np.mean, np.std])
+   grouped = df.groupby(["month", "week"])
+   grouped["x"].agg([np.mean, np.std])
 
 
 For more details and examples see :ref:`the groupby documentation
@@ -410,13 +432,17 @@ In Python, the :meth:`~pandas.melt` method is the R equivalent:
 
 .. ipython:: python
 
-   cheese = pd.DataFrame({'first': ['John', 'Mary'],
-                          'last': ['Doe', 'Bo'],
-                          'height': [5.5, 6.0],
-                          'weight': [130, 150]})
+   cheese = pd.DataFrame(
+        {
+            "first": ["John", "Mary"],
+            "last": ["Doe", "Bo"],
+            "height": [5.5, 6.0],
+            "weight": [130, 150],
+        }
+    )
 
-   pd.melt(cheese, id_vars=['first', 'last'])
-   cheese.set_index(['first', 'last']).stack()  # alternative way
+   pd.melt(cheese, id_vars=["first", "last"])
+   cheese.set_index(["first", "last"]).stack()  # alternative way
 
 For more details and examples see :ref:`the reshaping documentation
 <reshaping.melt>`.
@@ -444,15 +470,24 @@ In Python the best way is to make use of :meth:`~pandas.pivot_table`:
 
 .. ipython:: python
 
-   df = pd.DataFrame({'x': np.random.uniform(1., 168., 12),
-                      'y': np.random.uniform(7., 334., 12),
-                      'z': np.random.uniform(1.7, 20.7, 12),
-                      'month': [5, 6, 7] * 4,
-                      'week': [1, 2] * 6})
-
-   mdf = pd.melt(df, id_vars=['month', 'week'])
-   pd.pivot_table(mdf, values='value', index=['variable', 'week'],
-                  columns=['month'], aggfunc=np.mean)
+   df = pd.DataFrame(
+        {
+            "x": np.random.uniform(1.0, 168.0, 12),
+            "y": np.random.uniform(7.0, 334.0, 12),
+            "z": np.random.uniform(1.7, 20.7, 12),
+            "month": [5, 6, 7] * 4,
+            "week": [1, 2] * 6,
+        }
+    )
+
+    mdf = pd.melt(df, id_vars=["month", "week"])
+    pd.pivot_table(
+        mdf,
+        values="value",
+        index=["variable", "week"],
+        columns=["month"],
+        aggfunc=np.mean,
+    )
 
 Similarly for ``dcast`` which uses a data.frame called ``df`` in R to
 aggregate information based on ``Animal`` and ``FeedType``:
@@ -475,21 +510,29 @@ using :meth:`~pandas.pivot_table`:
 
 .. ipython:: python
 
-   df = pd.DataFrame({
-       'Animal': ['Animal1', 'Animal2', 'Animal3', 'Animal2', 'Animal1',
-                  'Animal2', 'Animal3'],
-       'FeedType': ['A', 'B', 'A', 'A', 'B', 'B', 'A'],
-       'Amount': [10, 7, 4, 2, 5, 6, 2],
-   })
-
-   df.pivot_table(values='Amount', index='Animal', columns='FeedType',
-                  aggfunc='sum')
+   df = pd.DataFrame(
+        {
+            "Animal": [
+                "Animal1",
+                "Animal2",
+                "Animal3",
+                "Animal2",
+                "Animal1",
+                "Animal2",
+                "Animal3",
+            ],
+            "FeedType": ["A", "B", "A", "A", "B", "B", "A"],
+            "Amount": [10, 7, 4, 2, 5, 6, 2],
+        }
+    )
+
+    df.pivot_table(values="Amount", index="Animal", columns="FeedType", aggfunc="sum")
 
 The second approach is to use the :meth:`~pandas.DataFrame.groupby` method:
 
 .. ipython:: python
 
-   df.groupby(['Animal', 'FeedType'])['Amount'].sum()
+   df.groupby(["Animal", "FeedType"])["Amount"].sum()
 
 For more details and examples see :ref:`the reshaping documentation
 <reshaping.pivot>` or :ref:`the groupby documentation<groupby.split>`.
diff --git a/doc/source/getting_started/comparison/comparison_with_sas.rst b/doc/source/getting_started/comparison/comparison_with_sas.rst
index 85c6ea2c31969..381558b1359f7 100644
--- a/doc/source/getting_started/comparison/comparison_with_sas.rst
+++ b/doc/source/getting_started/comparison/comparison_with_sas.rst
@@ -106,7 +106,7 @@ and the values are the data.
 
 .. ipython:: python
 
-   df = pd.DataFrame({'x': [1, 3, 5], 'y': [2, 4, 6]})
+   df = pd.DataFrame({"x": [1, 3, 5], "y": [2, 4, 6]})
    df
 
 
@@ -130,10 +130,12 @@ The pandas method is :func:`read_csv`, which works similarly.
 
 .. ipython:: python
 
-   url = ('https://raw.github.com/pandas-dev/'
-          'pandas/master/pandas/tests/io/data/csv/tips.csv')
-   tips = pd.read_csv(url)
-   tips.head()
+   url = (
+        "https://raw.github.com/pandas-dev/"
+        "pandas/master/pandas/tests/io/data/csv/tips.csv"
+    )
+    tips = pd.read_csv(url)
+    tips.head()
 
 
 Like ``PROC IMPORT``, ``read_csv`` can take a number of parameters to specify
@@ -142,10 +144,10 @@ and did not have column names, the pandas command would be:
 
 .. code-block:: python
 
-   tips = pd.read_csv('tips.csv', sep='\t', header=None)
+   tips = pd.read_csv("tips.csv", sep="\t", header=None)
 
    # alternatively, read_table is an alias to read_csv with tab delimiter
-   tips = pd.read_table('tips.csv', header=None)
+   tips = pd.read_table("tips.csv", header=None)
 
 In addition to text/csv, pandas supports a variety of other data formats
 such as Excel, HDF5, and SQL databases.  These are all read via a ``pd.read_*``
@@ -166,7 +168,7 @@ and other data formats follow a similar api.
 
 .. code-block:: python
 
-   tips.to_csv('tips2.csv')
+   tips.to_csv("tips2.csv")
 
 
 Data operations
@@ -192,14 +194,14 @@ New columns can be assigned in the same way.
 
 .. ipython:: python
 
-   tips['total_bill'] = tips['total_bill'] - 2
-   tips['new_bill'] = tips['total_bill'] / 2.0
+   tips["total_bill"] = tips["total_bill"] - 2
+   tips["new_bill"] = tips["total_bill"] / 2.0
    tips.head()
 
 .. ipython:: python
    :suppress:
 
-   tips = tips.drop('new_bill', axis=1)
+   tips = tips.drop("new_bill", axis=1)
 
 Filtering
 ~~~~~~~~~
@@ -226,7 +228,7 @@ DataFrames can be filtered in multiple ways; the most intuitive of which is usin
 
 .. ipython:: python
 
-   tips[tips['total_bill'] > 10].head()
+   tips[tips["total_bill"] > 10].head()
 
 If/then logic
 ~~~~~~~~~~~~~
@@ -248,13 +250,13 @@ the ``where`` method from ``numpy``.
 
 .. ipython:: python
 
-   tips['bucket'] = np.where(tips['total_bill'] < 10, 'low', 'high')
+   tips["bucket"] = np.where(tips["total_bill"] < 10, "low", "high")
    tips.head()
 
 .. ipython:: python
    :suppress:
 
-   tips = tips.drop('bucket', axis=1)
+   tips = tips.drop("bucket", axis=1)
 
 Date functionality
 ~~~~~~~~~~~~~~~~~~
@@ -284,22 +286,26 @@ see the :ref:`timeseries documentation<timeseries>` for more details.
 
 .. ipython:: python
 
-   tips['date1'] = pd.Timestamp('2013-01-15')
-   tips['date2'] = pd.Timestamp('2015-02-15')
-   tips['date1_year'] = tips['date1'].dt.year
-   tips['date2_month'] = tips['date2'].dt.month
-   tips['date1_next'] = tips['date1'] + pd.offsets.MonthBegin()
-   tips['months_between'] = (
-       tips['date2'].dt.to_period('M') - tips['date1'].dt.to_period('M'))
+   tips["date1"] = pd.Timestamp("2013-01-15")
+   tips["date2"] = pd.Timestamp("2015-02-15")
+   tips["date1_year"] = tips["date1"].dt.year
+   tips["date2_month"] = tips["date2"].dt.month
+   tips["date1_next"] = tips["date1"] + pd.offsets.MonthBegin()
+   tips["months_between"] = tips["date2"].dt.to_period("M") - tips[
+      "date1"
+   ].dt.to_period("M")
 
-   tips[['date1', 'date2', 'date1_year', 'date2_month',
-         'date1_next', 'months_between']].head()
+   tips[
+      ["date1", "date2", "date1_year", "date2_month", "date1_next", "months_between"]
+   ].head()
 
 .. ipython:: python
    :suppress:
 
-   tips = tips.drop(['date1', 'date2', 'date1_year',
-                     'date2_month', 'date1_next', 'months_between'], axis=1)
+   tips = tips.drop(
+        ["date1", "date2", "date1_year", "date2_month", "date1_next", "months_between"],
+        axis=1,
+    )
 
 Selection of columns
 ~~~~~~~~~~~~~~~~~~~~
@@ -329,13 +335,13 @@ The same operations are expressed in pandas below.
 .. ipython:: python
 
    # keep
-   tips[['sex', 'total_bill', 'tip']].head()
+   tips[["sex", "total_bill", "tip"]].head()
 
    # drop
-   tips.drop('sex', axis=1).head()
+   tips.drop("sex", axis=1).head()
 
    # rename
-   tips.rename(columns={'total_bill': 'total_bill_2'}).head()
+   tips.rename(columns={"total_bill": "total_bill_2"}).head()
 
 
 Sorting by values
@@ -354,7 +360,7 @@ takes a list of columns to sort by.
 
 .. ipython:: python
 
-   tips = tips.sort_values(['sex', 'total_bill'])
+   tips = tips.sort_values(["sex", "total_bill"])
    tips.head()
 
 
@@ -383,8 +389,8 @@ trailing blanks.
 
 .. ipython:: python
 
-   tips['time'].str.len().head()
-   tips['time'].str.rstrip().str.len().head()
+   tips["time"].str.len().head()
+   tips["time"].str.rstrip().str.len().head()
 
 
 Find
@@ -410,7 +416,7 @@ the function will return -1 if it fails to find the substring.
 
 .. ipython:: python
 
-   tips['sex'].str.find("ale").head()
+   tips["sex"].str.find("ale").head()
 
 
 Substring
@@ -432,7 +438,7 @@ indexes are zero-based.
 
 .. ipython:: python
 
-   tips['sex'].str[0:1].head()
+   tips["sex"].str[0:1].head()
 
 
 Scan
@@ -460,9 +466,9 @@ approaches, but this just shows a simple approach.
 
 .. ipython:: python
 
-   firstlast = pd.DataFrame({'String': ['John Smith', 'Jane Cook']})
-   firstlast['First_Name'] = firstlast['String'].str.split(" ", expand=True)[0]
-   firstlast['Last_Name'] = firstlast['String'].str.rsplit(" ", expand=True)[0]
+   firstlast = pd.DataFrame({"String": ["John Smith", "Jane Cook"]})
+   firstlast["First_Name"] = firstlast["String"].str.split(" ", expand=True)[0]
+   firstlast["Last_Name"] = firstlast["String"].str.rsplit(" ", expand=True)[0]
    firstlast
 
 
@@ -491,10 +497,10 @@ The equivalent Python functions are ``upper``, ``lower``, and ``title``.
 
 .. ipython:: python
 
-   firstlast = pd.DataFrame({'String': ['John Smith', 'Jane Cook']})
-   firstlast['string_up'] = firstlast['String'].str.upper()
-   firstlast['string_low'] = firstlast['String'].str.lower()
-   firstlast['string_prop'] = firstlast['String'].str.title()
+   firstlast = pd.DataFrame({"String": ["John Smith", "Jane Cook"]})
+   firstlast["string_up"] = firstlast["String"].str.upper()
+   firstlast["string_low"] = firstlast["String"].str.lower()
+   firstlast["string_prop"] = firstlast["String"].str.title()
    firstlast
 
 Merging
@@ -504,11 +510,9 @@ The following tables will be used in the merge examples
 
 .. ipython:: python
 
-   df1 = pd.DataFrame({'key': ['A', 'B', 'C', 'D'],
-                       'value': np.random.randn(4)})
+   df1 = pd.DataFrame({"key": ["A", "B", "C", "D"], "value": np.random.randn(4)})
    df1
-   df2 = pd.DataFrame({'key': ['B', 'D', 'D', 'E'],
-                       'value': np.random.randn(4)})
+   df2 = pd.DataFrame({"key": ["B", "D", "D", "E"], "value": np.random.randn(4)})
    df2
 
 In SAS, data must be explicitly sorted before merging.  Different
@@ -542,16 +546,16 @@ types are accomplished via the ``how`` keyword.
 
 .. ipython:: python
 
-   inner_join = df1.merge(df2, on=['key'], how='inner')
+   inner_join = df1.merge(df2, on=["key"], how="inner")
    inner_join
 
-   left_join = df1.merge(df2, on=['key'], how='left')
+   left_join = df1.merge(df2, on=["key"], how="left")
    left_join
 
-   right_join = df1.merge(df2, on=['key'], how='right')
+   right_join = df1.merge(df2, on=["key"], how="right")
    right_join
 
-   outer_join = df1.merge(df2, on=['key'], how='outer')
+   outer_join = df1.merge(df2, on=["key"], how="outer")
    outer_join
 
 
@@ -566,8 +570,8 @@ operations, and is ignored by default for aggregations.
 .. ipython:: python
 
    outer_join
-   outer_join['value_x'] + outer_join['value_y']
-   outer_join['value_x'].sum()
+   outer_join["value_x"] + outer_join["value_y"]
+   outer_join["value_x"].sum()
 
 One difference is that missing data cannot be compared to its sentinel value.
 For example, in SAS you could do this to filter missing values.
@@ -589,8 +593,8 @@ should be used for comparisons.
 
 .. ipython:: python
 
-   outer_join[pd.isna(outer_join['value_x'])]
-   outer_join[pd.notna(outer_join['value_x'])]
+   outer_join[pd.isna(outer_join["value_x"])]
+   outer_join[pd.notna(outer_join["value_x"])]
 
 pandas also provides a variety of methods to work with missing data - some of
 which would be challenging to express in SAS. For example, there are methods to
@@ -601,8 +605,8 @@ value, like the mean, or forward filling from previous rows. See the
 .. ipython:: python
 
    outer_join.dropna()
-   outer_join.fillna(method='ffill')
-   outer_join['value_x'].fillna(outer_join['value_x'].mean())
+   outer_join.fillna(method="ffill")
+   outer_join["value_x"].fillna(outer_join["value_x"].mean())
 
 
 GroupBy
@@ -629,7 +633,7 @@ for more details and examples.
 
 .. ipython:: python
 
-   tips_summed = tips.groupby(['sex', 'smoker'])[['total_bill', 'tip']].sum()
+   tips_summed = tips.groupby(["sex", "smoker"])[["total_bill", "tip"]].sum()
    tips_summed.head()
 
 
@@ -666,8 +670,8 @@ operation.
 
 .. ipython:: python
 
-   gb = tips.groupby('smoker')['total_bill']
-   tips['adj_total_bill'] = tips['total_bill'] - gb.transform('mean')
+   gb = tips.groupby("smoker")["total_bill"]
+   tips["adj_total_bill"] = tips["total_bill"] - gb.transform("mean")
    tips.head()
 
 
@@ -695,7 +699,7 @@ In pandas this would be written as:
 
 .. ipython:: python
 
-   tips.groupby(['sex', 'smoker']).first()
+   tips.groupby(["sex", "smoker"]).first()
 
 
 Other considerations
@@ -729,16 +733,16 @@ the XPORT or SAS7BDAT binary format.
 
 .. code-block:: python
 
-   df = pd.read_sas('transport-file.xpt')
-   df = pd.read_sas('binary-file.sas7bdat')
+   df = pd.read_sas("transport-file.xpt")
+   df = pd.read_sas("binary-file.sas7bdat")
 
 You can also specify the file format directly. By default, pandas will try
 to infer the file format based on its extension.
 
 .. code-block:: python
 
-   df = pd.read_sas('transport-file.xpt', format='xport')
-   df = pd.read_sas('binary-file.sas7bdat', format='sas7bdat')
+   df = pd.read_sas("transport-file.xpt", format="xport")
+   df = pd.read_sas("binary-file.sas7bdat", format="sas7bdat")
 
 XPORT is a relatively limited format and the parsing of it is not as
 optimized as some of the other pandas readers. An alternative way
@@ -752,4 +756,4 @@ to interop data between SAS and pandas is to serialize to csv.
    Wall time: 14.6 s
 
    In [9]: %time df = pd.read_csv('big.csv')
-   Wall time: 4.86 s
\ No newline at end of file
+   Wall time: 4.86 s
diff --git a/doc/source/getting_started/comparison/comparison_with_sql.rst b/doc/source/getting_started/comparison/comparison_with_sql.rst
index 04f97a27cde39..6848d8df2e46b 100644
--- a/doc/source/getting_started/comparison/comparison_with_sql.rst
+++ b/doc/source/getting_started/comparison/comparison_with_sql.rst
@@ -24,8 +24,10 @@ structure.
 
 .. ipython:: python
 
-    url = ('https://raw.github.com/pandas-dev'
-           '/pandas/master/pandas/tests/io/data/csv/tips.csv')
+    url = (
+        "https://raw.github.com/pandas-dev"
+        "/pandas/master/pandas/tests/io/data/csv/tips.csv"
+    )
     tips = pd.read_csv(url)
     tips.head()
 
@@ -44,7 +46,7 @@ With pandas, column selection is done by passing a list of column names to your
 
 .. ipython:: python
 
-    tips[['total_bill', 'tip', 'smoker', 'time']].head(5)
+    tips[["total_bill", "tip", "smoker", "time"]].head(5)
 
 Calling the DataFrame without the list of column names would display all columns (akin to SQL's
 ``*``).
@@ -61,7 +63,7 @@ With pandas, you can use the :meth:`DataFrame.assign` method of a DataFrame to a
 
 .. ipython:: python
 
-    tips.assign(tip_rate=tips['tip'] / tips['total_bill']).head(5)
+    tips.assign(tip_rate=tips["tip"] / tips["total_bill"]).head(5)
 
 WHERE
 -----
@@ -79,14 +81,14 @@ DataFrames can be filtered in multiple ways; the most intuitive of which is usin
 
 .. ipython:: python
 
-    tips[tips['time'] == 'Dinner'].head(5)
+    tips[tips["time"] == "Dinner"].head(5)
 
 The above statement is simply passing a ``Series`` of True/False objects to the DataFrame,
 returning all rows with True.
 
 .. ipython:: python
 
-    is_dinner = tips['time'] == 'Dinner'
+    is_dinner = tips["time"] == "Dinner"
     is_dinner.value_counts()
     tips[is_dinner].head(5)
 
@@ -103,7 +105,7 @@ Just like SQL's OR and AND, multiple conditions can be passed to a DataFrame usi
 .. ipython:: python
 
     # tips of more than $5.00 at Dinner meals
-    tips[(tips['time'] == 'Dinner') & (tips['tip'] > 5.00)]
+    tips[(tips["time"] == "Dinner") & (tips["tip"] > 5.00)]
 
 .. code-block:: sql
 
@@ -115,15 +117,16 @@ Just like SQL's OR and AND, multiple conditions can be passed to a DataFrame usi
 .. ipython:: python
 
     # tips by parties of at least 5 diners OR bill total was more than $45
-    tips[(tips['size'] >= 5) | (tips['total_bill'] > 45)]
+    tips[(tips["size"] >= 5) | (tips["total_bill"] > 45)]
 
 NULL checking is done using the :meth:`~pandas.Series.notna` and :meth:`~pandas.Series.isna`
 methods.
 
 .. ipython:: python
 
-    frame = pd.DataFrame({'col1': ['A', 'B', np.NaN, 'C', 'D'],
-                          'col2': ['F', np.NaN, 'G', 'H', 'I']})
+    frame = pd.DataFrame(
+        {"col1": ["A", "B", np.NaN, "C", "D"], "col2": ["F", np.NaN, "G", "H", "I"]}
+    )
     frame
 
 Assume we have a table of the same structure as our DataFrame above. We can see only the records
@@ -137,7 +140,7 @@ where ``col2`` IS NULL with the following query:
 
 .. ipython:: python
 
-    frame[frame['col2'].isna()]
+    frame[frame["col2"].isna()]
 
 Getting items where ``col1`` IS NOT NULL can be done with :meth:`~pandas.Series.notna`.
 
@@ -149,7 +152,7 @@ Getting items where ``col1`` IS NOT NULL can be done with :meth:`~pandas.Series.
 
 .. ipython:: python
 
-    frame[frame['col1'].notna()]
+    frame[frame["col1"].notna()]
 
 
 GROUP BY
@@ -177,7 +180,7 @@ The pandas equivalent would be:
 
 .. ipython:: python
 
-    tips.groupby('sex').size()
+    tips.groupby("sex").size()
 
 Notice that in the pandas code we used :meth:`~pandas.core.groupby.DataFrameGroupBy.size` and not
 :meth:`~pandas.core.groupby.DataFrameGroupBy.count`. This is because
@@ -186,14 +189,14 @@ the number of ``not null`` records within each.
 
 .. ipython:: python
 
-    tips.groupby('sex').count()
+    tips.groupby("sex").count()
 
 Alternatively, we could have applied the :meth:`~pandas.core.groupby.DataFrameGroupBy.count` method
 to an individual column:
 
 .. ipython:: python
 
-    tips.groupby('sex')['total_bill'].count()
+    tips.groupby("sex")["total_bill"].count()
 
 Multiple functions can also be applied at once. For instance, say we'd like to see how tip amount
 differs by day of the week - :meth:`~pandas.core.groupby.DataFrameGroupBy.agg` allows you to pass a dictionary
@@ -213,7 +216,7 @@ to your grouped DataFrame, indicating which functions to apply to specific colum
 
 .. ipython:: python
 
-    tips.groupby('day').agg({'tip': np.mean, 'day': np.size})
+    tips.groupby("day").agg({"tip": np.mean, "day": np.size})
 
 Grouping by more than one column is done by passing a list of columns to the
 :meth:`~pandas.DataFrame.groupby` method.
@@ -237,7 +240,7 @@ Grouping by more than one column is done by passing a list of columns to the
 
 .. ipython:: python
 
-    tips.groupby(['smoker', 'day']).agg({'tip': [np.size, np.mean]})
+    tips.groupby(["smoker", "day"]).agg({"tip": [np.size, np.mean]})
 
 .. _compare_with_sql.join:
 
@@ -250,10 +253,8 @@ columns to join on (column names or indices).
 
 .. ipython:: python
 
-    df1 = pd.DataFrame({'key': ['A', 'B', 'C', 'D'],
-                        'value': np.random.randn(4)})
-    df2 = pd.DataFrame({'key': ['B', 'D', 'D', 'E'],
-                        'value': np.random.randn(4)})
+    df1 = pd.DataFrame({"key": ["A", "B", "C", "D"], "value": np.random.randn(4)})
+    df2 = pd.DataFrame({"key": ["B", "D", "D", "E"], "value": np.random.randn(4)})
 
 Assume we have two database tables of the same name and structure as our DataFrames.
 
@@ -271,15 +272,15 @@ INNER JOIN
 .. ipython:: python
 
     # merge performs an INNER JOIN by default
-    pd.merge(df1, df2, on='key')
+    pd.merge(df1, df2, on="key")
 
 :meth:`~pandas.merge` also offers parameters for cases when you'd like to join one DataFrame's
 column with another DataFrame's index.
 
 .. ipython:: python
 
-    indexed_df2 = df2.set_index('key')
-    pd.merge(df1, indexed_df2, left_on='key', right_index=True)
+    indexed_df2 = df2.set_index("key")
+    pd.merge(df1, indexed_df2, left_on="key", right_index=True)
 
 LEFT OUTER JOIN
 ~~~~~~~~~~~~~~~
@@ -294,7 +295,7 @@ LEFT OUTER JOIN
 .. ipython:: python
 
     # show all records from df1
-    pd.merge(df1, df2, on='key', how='left')
+    pd.merge(df1, df2, on="key", how="left")
 
 RIGHT JOIN
 ~~~~~~~~~~
@@ -309,7 +310,7 @@ RIGHT JOIN
 .. ipython:: python
 
     # show all records from df2
-    pd.merge(df1, df2, on='key', how='right')
+    pd.merge(df1, df2, on="key", how="right")
 
 FULL JOIN
 ~~~~~~~~~
@@ -327,7 +328,7 @@ joined columns find a match. As of writing, FULL JOINs are not supported in all
 .. ipython:: python
 
     # show all records from both frames
-    pd.merge(df1, df2, on='key', how='outer')
+    pd.merge(df1, df2, on="key", how="outer")
 
 
 UNION
@@ -336,10 +337,12 @@ UNION ALL can be performed using :meth:`~pandas.concat`.
 
 .. ipython:: python
 
-    df1 = pd.DataFrame({'city': ['Chicago', 'San Francisco', 'New York City'],
-                        'rank': range(1, 4)})
-    df2 = pd.DataFrame({'city': ['Chicago', 'Boston', 'Los Angeles'],
-                        'rank': [1, 4, 5]})
+    df1 = pd.DataFrame(
+        {"city": ["Chicago", "San Francisco", "New York City"], "rank": range(1, 4)}
+    )
+    df2 = pd.DataFrame(
+        {"city": ["Chicago", "Boston", "Los Angeles"], "rank": [1, 4, 5]}
+    )
 
 .. code-block:: sql
 
@@ -403,7 +406,7 @@ Top n rows with offset
 
 .. ipython:: python
 
-    tips.nlargest(10 + 5, columns='tip').tail(10)
+    tips.nlargest(10 + 5, columns="tip").tail(10)
 
 Top n rows per group
 ~~~~~~~~~~~~~~~~~~~~
@@ -423,20 +426,30 @@ Top n rows per group
 
 .. ipython:: python
 
-    (tips.assign(rn=tips.sort_values(['total_bill'], ascending=False)
-                        .groupby(['day'])
-                        .cumcount() + 1)
-         .query('rn < 3')
-         .sort_values(['day', 'rn']))
+    (
+        tips.assign(
+            rn=tips.sort_values(["total_bill"], ascending=False)
+            .groupby(["day"])
+            .cumcount()
+            + 1
+        )
+        .query("rn < 3")
+        .sort_values(["day", "rn"])
+    )
 
 the same using ``rank(method='first')`` function
 
 .. ipython:: python
 
-    (tips.assign(rnk=tips.groupby(['day'])['total_bill']
-                         .rank(method='first', ascending=False))
-         .query('rnk < 3')
-         .sort_values(['day', 'rnk']))
+    (
+        tips.assign(
+            rnk=tips.groupby(["day"])["total_bill"].rank(
+                method="first", ascending=False
+            )
+        )
+        .query("rnk < 3")
+        .sort_values(["day", "rnk"])
+    )
 
 .. code-block:: sql
 
@@ -458,11 +471,12 @@ Notice that when using ``rank(method='min')`` function
 
 .. ipython:: python
 
-    (tips[tips['tip'] < 2]
-        .assign(rnk_min=tips.groupby(['sex'])['tip']
-                            .rank(method='min'))
-        .query('rnk_min < 3')
-        .sort_values(['sex', 'rnk_min']))
+    (
+        tips[tips["tip"] < 2]
+        .assign(rnk_min=tips.groupby(["sex"])["tip"].rank(method="min"))
+        .query("rnk_min < 3")
+        .sort_values(["sex", "rnk_min"])
+    )
 
 
 UPDATE
@@ -476,7 +490,7 @@ UPDATE
 
 .. ipython:: python
 
-    tips.loc[tips['tip'] < 2, 'tip'] *= 2
+    tips.loc[tips["tip"] < 2, "tip"] *= 2
 
 DELETE
 ------
@@ -490,4 +504,4 @@ In pandas we select the rows that should remain, instead of deleting them
 
 .. ipython:: python
 
-    tips = tips.loc[tips['tip'] <= 9]
+    tips = tips.loc[tips["tip"] <= 9]
diff --git a/doc/source/getting_started/comparison/comparison_with_stata.rst b/doc/source/getting_started/comparison/comparison_with_stata.rst
index 06f9e45466243..498be88453fc7 100644
--- a/doc/source/getting_started/comparison/comparison_with_stata.rst
+++ b/doc/source/getting_started/comparison/comparison_with_stata.rst
@@ -103,7 +103,7 @@ and the values are the data.
 
 .. ipython:: python
 
-   df = pd.DataFrame({'x': [1, 3, 5], 'y': [2, 4, 6]})
+   df = pd.DataFrame({"x": [1, 3, 5], "y": [2, 4, 6]})
    df
 
 
@@ -127,8 +127,10 @@ the data set if presented with a url.
 
 .. ipython:: python
 
-   url = ('https://raw.github.com/pandas-dev'
-          '/pandas/master/pandas/tests/io/data/csv/tips.csv')
+   url = (
+      "https://raw.github.com/pandas-dev"
+      "/pandas/master/pandas/tests/io/data/csv/tips.csv"
+   )
    tips = pd.read_csv(url)
    tips.head()
 
@@ -139,16 +141,16 @@ the pandas command would be:
 
 .. code-block:: python
 
-   tips = pd.read_csv('tips.csv', sep='\t', header=None)
+   tips = pd.read_csv("tips.csv", sep="\t", header=None)
 
    # alternatively, read_table is an alias to read_csv with tab delimiter
-   tips = pd.read_table('tips.csv', header=None)
+   tips = pd.read_table("tips.csv", header=None)
 
 Pandas can also read Stata data sets in ``.dta`` format with the :func:`read_stata` function.
 
 .. code-block:: python
 
-   df = pd.read_stata('data.dta')
+   df = pd.read_stata("data.dta")
 
 In addition to text/csv and Stata files, pandas supports a variety of other data formats
 such as Excel, SAS, HDF5, Parquet, and SQL databases.  These are all read via a ``pd.read_*``
@@ -168,13 +170,13 @@ Similarly in pandas, the opposite of ``read_csv`` is :meth:`DataFrame.to_csv`.
 
 .. code-block:: python
 
-   tips.to_csv('tips2.csv')
+   tips.to_csv("tips2.csv")
 
 Pandas can also export to Stata file format with the :meth:`DataFrame.to_stata` method.
 
 .. code-block:: python
 
-   tips.to_stata('tips2.dta')
+   tips.to_stata("tips2.dta")
 
 
 Data operations
@@ -200,11 +202,11 @@ drops a column from the ``DataFrame``.
 
 .. ipython:: python
 
-   tips['total_bill'] = tips['total_bill'] - 2
-   tips['new_bill'] = tips['total_bill'] / 2
+   tips["total_bill"] = tips["total_bill"] - 2
+   tips["new_bill"] = tips["total_bill"] / 2
    tips.head()
 
-   tips = tips.drop('new_bill', axis=1)
+   tips = tips.drop("new_bill", axis=1)
 
 Filtering
 ~~~~~~~~~
@@ -220,7 +222,7 @@ DataFrames can be filtered in multiple ways; the most intuitive of which is usin
 
 .. ipython:: python
 
-   tips[tips['total_bill'] > 10].head()
+   tips[tips["total_bill"] > 10].head()
 
 If/then logic
 ~~~~~~~~~~~~~
@@ -237,13 +239,13 @@ the ``where`` method from ``numpy``.
 
 .. ipython:: python
 
-   tips['bucket'] = np.where(tips['total_bill'] < 10, 'low', 'high')
+   tips["bucket"] = np.where(tips["total_bill"] < 10, "low", "high")
    tips.head()
 
 .. ipython:: python
    :suppress:
 
-   tips = tips.drop('bucket', axis=1)
+   tips = tips.drop("bucket", axis=1)
 
 Date functionality
 ~~~~~~~~~~~~~~~~~~
@@ -273,22 +275,26 @@ see the :ref:`timeseries documentation<timeseries>` for more details.
 
 .. ipython:: python
 
-   tips['date1'] = pd.Timestamp('2013-01-15')
-   tips['date2'] = pd.Timestamp('2015-02-15')
-   tips['date1_year'] = tips['date1'].dt.year
-   tips['date2_month'] = tips['date2'].dt.month
-   tips['date1_next'] = tips['date1'] + pd.offsets.MonthBegin()
-   tips['months_between'] = (tips['date2'].dt.to_period('M')
-                             - tips['date1'].dt.to_period('M'))
+   tips["date1"] = pd.Timestamp("2013-01-15")
+   tips["date2"] = pd.Timestamp("2015-02-15")
+   tips["date1_year"] = tips["date1"].dt.year
+   tips["date2_month"] = tips["date2"].dt.month
+   tips["date1_next"] = tips["date1"] + pd.offsets.MonthBegin()
+   tips["months_between"] = tips["date2"].dt.to_period("M") - tips[
+      "date1"
+   ].dt.to_period("M")
 
-   tips[['date1', 'date2', 'date1_year', 'date2_month', 'date1_next',
-         'months_between']].head()
+   tips[
+      ["date1", "date2", "date1_year", "date2_month", "date1_next", "months_between"]
+   ].head()
 
 .. ipython:: python
    :suppress:
 
-   tips = tips.drop(['date1', 'date2', 'date1_year', 'date2_month',
-                     'date1_next', 'months_between'], axis=1)
+   tips = tips.drop(
+      ["date1", "date2", "date1_year", "date2_month", "date1_next", "months_between"],
+      axis=1,
+   )
 
 Selection of columns
 ~~~~~~~~~~~~~~~~~~~~
@@ -310,13 +316,13 @@ to a variable.
 .. ipython:: python
 
    # keep
-   tips[['sex', 'total_bill', 'tip']].head()
+   tips[["sex", "total_bill", "tip"]].head()
 
    # drop
-   tips.drop('sex', axis=1).head()
+   tips.drop("sex", axis=1).head()
 
    # rename
-   tips.rename(columns={'total_bill': 'total_bill_2'}).head()
+   tips.rename(columns={"total_bill": "total_bill_2"}).head()
 
 
 Sorting by values
@@ -333,7 +339,7 @@ takes a list of columns to sort by.
 
 .. ipython:: python
 
-   tips = tips.sort_values(['sex', 'total_bill'])
+   tips = tips.sort_values(["sex", "total_bill"])
    tips.head()
 
 
@@ -357,8 +363,8 @@ Use ``len`` and ``rstrip`` to exclude trailing blanks.
 
 .. ipython:: python
 
-   tips['time'].str.len().head()
-   tips['time'].str.rstrip().str.len().head()
+   tips["time"].str.len().head()
+   tips["time"].str.rstrip().str.len().head()
 
 
 Finding position of substring
@@ -380,7 +386,7 @@ the function will return -1 if it fails to find the substring.
 
 .. ipython:: python
 
-   tips['sex'].str.find("ale").head()
+   tips["sex"].str.find("ale").head()
 
 
 Extracting substring by position
@@ -398,7 +404,7 @@ indexes are zero-based.
 
 .. ipython:: python
 
-   tips['sex'].str[0:1].head()
+   tips["sex"].str[0:1].head()
 
 
 Extracting nth word
@@ -425,9 +431,9 @@ approaches, but this just shows a simple approach.
 
 .. ipython:: python
 
-   firstlast = pd.DataFrame({'string': ['John Smith', 'Jane Cook']})
-   firstlast['First_Name'] = firstlast['string'].str.split(" ", expand=True)[0]
-   firstlast['Last_Name'] = firstlast['string'].str.rsplit(" ", expand=True)[0]
+   firstlast = pd.DataFrame({"string": ["John Smith", "Jane Cook"]})
+   firstlast["First_Name"] = firstlast["string"].str.split(" ", expand=True)[0]
+   firstlast["Last_Name"] = firstlast["string"].str.rsplit(" ", expand=True)[0]
    firstlast
 
 
@@ -455,10 +461,10 @@ The equivalent Python functions are ``upper``, ``lower``, and ``title``.
 
 .. ipython:: python
 
-   firstlast = pd.DataFrame({'string': ['John Smith', 'Jane Cook']})
-   firstlast['upper'] = firstlast['string'].str.upper()
-   firstlast['lower'] = firstlast['string'].str.lower()
-   firstlast['title'] = firstlast['string'].str.title()
+   firstlast = pd.DataFrame({"string": ["John Smith", "Jane Cook"]})
+   firstlast["upper"] = firstlast["string"].str.upper()
+   firstlast["lower"] = firstlast["string"].str.lower()
+   firstlast["title"] = firstlast["string"].str.title()
    firstlast
 
 Merging
@@ -468,11 +474,9 @@ The following tables will be used in the merge examples
 
 .. ipython:: python
 
-   df1 = pd.DataFrame({'key': ['A', 'B', 'C', 'D'],
-                       'value': np.random.randn(4)})
+   df1 = pd.DataFrame({"key": ["A", "B", "C", "D"], "value": np.random.randn(4)})
    df1
-   df2 = pd.DataFrame({'key': ['B', 'D', 'D', 'E'],
-                       'value': np.random.randn(4)})
+   df2 = pd.DataFrame({"key": ["B", "D", "D", "E"], "value": np.random.randn(4)})
    df2
 
 In Stata, to perform a merge, one data set must be in memory
@@ -534,16 +538,16 @@ types are accomplished via the ``how`` keyword.
 
 .. ipython:: python
 
-   inner_join = df1.merge(df2, on=['key'], how='inner')
+   inner_join = df1.merge(df2, on=["key"], how="inner")
    inner_join
 
-   left_join = df1.merge(df2, on=['key'], how='left')
+   left_join = df1.merge(df2, on=["key"], how="left")
    left_join
 
-   right_join = df1.merge(df2, on=['key'], how='right')
+   right_join = df1.merge(df2, on=["key"], how="right")
    right_join
 
-   outer_join = df1.merge(df2, on=['key'], how='outer')
+   outer_join = df1.merge(df2, on=["key"], how="outer")
    outer_join
 
 
@@ -558,8 +562,8 @@ operations, and is ignored by default for aggregations.
 .. ipython:: python
 
    outer_join
-   outer_join['value_x'] + outer_join['value_y']
-   outer_join['value_x'].sum()
+   outer_join["value_x"] + outer_join["value_y"]
+   outer_join["value_x"].sum()
 
 One difference is that missing data cannot be compared to its sentinel value.
 For example, in Stata you could do this to filter missing values.
@@ -576,8 +580,8 @@ should be used for comparisons.
 
 .. ipython:: python
 
-   outer_join[pd.isna(outer_join['value_x'])]
-   outer_join[pd.notna(outer_join['value_x'])]
+   outer_join[pd.isna(outer_join["value_x"])]
+   outer_join[pd.notna(outer_join["value_x"])]
 
 Pandas also provides a variety of methods to work with missing data -- some of
 which would be challenging to express in Stata. For example, there are methods to
@@ -591,10 +595,10 @@ value, like the mean, or forward filling from previous rows. See the
    outer_join.dropna()
 
    # Fill forwards
-   outer_join.fillna(method='ffill')
+   outer_join.fillna(method="ffill")
 
    # Impute missing values with the mean
-   outer_join['value_x'].fillna(outer_join['value_x'].mean())
+   outer_join["value_x"].fillna(outer_join["value_x"].mean())
 
 
 GroupBy
@@ -617,7 +621,7 @@ for more details and examples.
 
 .. ipython:: python
 
-   tips_summed = tips.groupby(['sex', 'smoker'])[['total_bill', 'tip']].sum()
+   tips_summed = tips.groupby(["sex", "smoker"])[["total_bill", "tip"]].sum()
    tips_summed.head()
 
 
@@ -640,8 +644,8 @@ operation.
 
 .. ipython:: python
 
-   gb = tips.groupby('smoker')['total_bill']
-   tips['adj_total_bill'] = tips['total_bill'] - gb.transform('mean')
+   gb = tips.groupby("smoker")["total_bill"]
+   tips["adj_total_bill"] = tips["total_bill"] - gb.transform("mean")
    tips.head()
 
 
@@ -661,7 +665,7 @@ In pandas this would be written as:
 
 .. ipython:: python
 
-   tips.groupby(['sex', 'smoker']).first()
+   tips.groupby(["sex", "smoker"]).first()
 
 
 Other considerations

From ba99f249f75e80e4804a2dd9161f1e8a17a099e8 Mon Sep 17 00:00:00 2001
From: PrayagS <prayag.s@ahduni.edu.in>
Date: Sat, 3 Oct 2020 01:31:41 +0530
Subject: [PATCH 2/3] DOC: fix flake8-rst errors

---
 .../comparison/comparison_with_r.rst          | 156 +++++++++---------
 .../comparison/comparison_with_sas.rst        |  20 +--
 .../comparison/comparison_with_stata.rst      |  10 +-
 3 files changed, 93 insertions(+), 93 deletions(-)

diff --git a/doc/source/getting_started/comparison/comparison_with_r.rst b/doc/source/getting_started/comparison/comparison_with_r.rst
index 123b911a97aaa..358bb6ad951f0 100644
--- a/doc/source/getting_started/comparison/comparison_with_r.rst
+++ b/doc/source/getting_started/comparison/comparison_with_r.rst
@@ -160,29 +160,29 @@ function.
 .. ipython:: python
 
    df = pd.DataFrame(
-        {
-            "v1": [1, 3, 5, 7, 8, 3, 5, np.nan, 4, 5, 7, 9],
-            "v2": [11, 33, 55, 77, 88, 33, 55, np.nan, 44, 55, 77, 99],
-            "by1": ["red", "blue", 1, 2, np.nan, "big", 1, 2, "red", 1, np.nan, 12],
-            "by2": [
-                "wet",
-                "dry",
-                99,
-                95,
-                np.nan,
-                "damp",
-                95,
-                99,
-                "red",
-                99,
-                np.nan,
-                np.nan,
-            ],
-        }
-    )
-
-    g = df.groupby(["by1", "by2"])
-    g[["v1", "v2"]].mean()
+       {
+           "v1": [1, 3, 5, 7, 8, 3, 5, np.nan, 4, 5, 7, 9],
+           "v2": [11, 33, 55, 77, 88, 33, 55, np.nan, 44, 55, 77, 99],
+           "by1": ["red", "blue", 1, 2, np.nan, "big", 1, 2, "red", 1, np.nan, 12],
+           "by2": [
+               "wet",
+               "dry",
+               99,
+               95,
+               np.nan,
+               "damp",
+               95,
+               99,
+               "red",
+               99,
+               np.nan,
+               np.nan,
+           ],
+       }
+   )
+
+   g = df.groupby(["by1", "by2"])
+   g[["v1", "v2"]].mean()
 
 For more details and examples see :ref:`the groupby documentation
 <groupby.split>`.
@@ -243,14 +243,14 @@ In ``pandas`` we may use :meth:`~pandas.pivot_table` method to handle this:
    import string
 
    baseball = pd.DataFrame(
-        {
-            "team": ["team %d" % (x + 1) for x in range(5)] * 5,
-            "player": random.sample(list(string.ascii_lowercase), 25),
-            "batting avg": np.random.uniform(0.200, 0.400, 25),
-        }
-    )
+       {
+           "team": ["team %d" % (x + 1) for x in range(5)] * 5,
+           "player": random.sample(list(string.ascii_lowercase), 25),
+           "batting avg": np.random.uniform(0.200, 0.400, 25),
+       }
+   )
 
-    baseball.pivot_table(values="batting avg", columns="team", aggfunc=np.max)
+   baseball.pivot_table(values="batting avg", columns="team", aggfunc=np.max)
 
 For more details and examples see :ref:`the reshaping documentation
 <reshaping.pivot>`.
@@ -353,14 +353,14 @@ In ``pandas`` the equivalent expression, using the
 .. ipython:: python
 
    df = pd.DataFrame(
-        {
-            "x": np.random.uniform(1.0, 168.0, 120),
-            "y": np.random.uniform(7.0, 334.0, 120),
-            "z": np.random.uniform(1.7, 20.7, 120),
-            "month": [5, 6, 7, 8] * 30,
-            "week": np.random.randint(1, 4, 120),
-        }
-    )
+       {
+           "x": np.random.uniform(1.0, 168.0, 120),
+           "y": np.random.uniform(7.0, 334.0, 120),
+           "z": np.random.uniform(1.7, 20.7, 120),
+           "month": [5, 6, 7, 8] * 30,
+           "week": np.random.randint(1, 4, 120),
+       }
+   )
 
    grouped = df.groupby(["month", "week"])
    grouped["x"].agg([np.mean, np.std])
@@ -433,13 +433,13 @@ In Python, the :meth:`~pandas.melt` method is the R equivalent:
 .. ipython:: python
 
    cheese = pd.DataFrame(
-        {
-            "first": ["John", "Mary"],
-            "last": ["Doe", "Bo"],
-            "height": [5.5, 6.0],
-            "weight": [130, 150],
-        }
-    )
+       {
+           "first": ["John", "Mary"],
+           "last": ["Doe", "Bo"],
+           "height": [5.5, 6.0],
+           "weight": [130, 150],
+       }
+   )
 
    pd.melt(cheese, id_vars=["first", "last"])
    cheese.set_index(["first", "last"]).stack()  # alternative way
@@ -471,23 +471,23 @@ In Python the best way is to make use of :meth:`~pandas.pivot_table`:
 .. ipython:: python
 
    df = pd.DataFrame(
-        {
-            "x": np.random.uniform(1.0, 168.0, 12),
-            "y": np.random.uniform(7.0, 334.0, 12),
-            "z": np.random.uniform(1.7, 20.7, 12),
-            "month": [5, 6, 7] * 4,
-            "week": [1, 2] * 6,
-        }
-    )
-
-    mdf = pd.melt(df, id_vars=["month", "week"])
-    pd.pivot_table(
-        mdf,
-        values="value",
-        index=["variable", "week"],
-        columns=["month"],
-        aggfunc=np.mean,
-    )
+       {
+           "x": np.random.uniform(1.0, 168.0, 12),
+           "y": np.random.uniform(7.0, 334.0, 12),
+           "z": np.random.uniform(1.7, 20.7, 12),
+           "month": [5, 6, 7] * 4,
+           "week": [1, 2] * 6,
+       }
+   )
+
+   mdf = pd.melt(df, id_vars=["month", "week"])
+   pd.pivot_table(
+       mdf,
+       values="value",
+       index=["variable", "week"],
+       columns=["month"],
+       aggfunc=np.mean,
+   )
 
 Similarly for ``dcast`` which uses a data.frame called ``df`` in R to
 aggregate information based on ``Animal`` and ``FeedType``:
@@ -511,22 +511,22 @@ using :meth:`~pandas.pivot_table`:
 .. ipython:: python
 
    df = pd.DataFrame(
-        {
-            "Animal": [
-                "Animal1",
-                "Animal2",
-                "Animal3",
-                "Animal2",
-                "Animal1",
-                "Animal2",
-                "Animal3",
-            ],
-            "FeedType": ["A", "B", "A", "A", "B", "B", "A"],
-            "Amount": [10, 7, 4, 2, 5, 6, 2],
-        }
-    )
-
-    df.pivot_table(values="Amount", index="Animal", columns="FeedType", aggfunc="sum")
+       {
+           "Animal": [
+               "Animal1",
+               "Animal2",
+               "Animal3",
+               "Animal2",
+               "Animal1",
+               "Animal2",
+               "Animal3",
+           ],
+           "FeedType": ["A", "B", "A", "A", "B", "B", "A"],
+           "Amount": [10, 7, 4, 2, 5, 6, 2],
+       }
+   )
+
+   df.pivot_table(values="Amount", index="Animal", columns="FeedType", aggfunc="sum")
 
 The second approach is to use the :meth:`~pandas.DataFrame.groupby` method:
 
diff --git a/doc/source/getting_started/comparison/comparison_with_sas.rst b/doc/source/getting_started/comparison/comparison_with_sas.rst
index 381558b1359f7..ae9f1caebd556 100644
--- a/doc/source/getting_started/comparison/comparison_with_sas.rst
+++ b/doc/source/getting_started/comparison/comparison_with_sas.rst
@@ -131,11 +131,11 @@ The pandas method is :func:`read_csv`, which works similarly.
 .. ipython:: python
 
    url = (
-        "https://raw.github.com/pandas-dev/"
-        "pandas/master/pandas/tests/io/data/csv/tips.csv"
-    )
-    tips = pd.read_csv(url)
-    tips.head()
+       "https://raw.github.com/pandas-dev/"
+       "pandas/master/pandas/tests/io/data/csv/tips.csv"
+   )
+   tips = pd.read_csv(url)
+   tips.head()
 
 
 Like ``PROC IMPORT``, ``read_csv`` can take a number of parameters to specify
@@ -292,20 +292,20 @@ see the :ref:`timeseries documentation<timeseries>` for more details.
    tips["date2_month"] = tips["date2"].dt.month
    tips["date1_next"] = tips["date1"] + pd.offsets.MonthBegin()
    tips["months_between"] = tips["date2"].dt.to_period("M") - tips[
-      "date1"
+       "date1"
    ].dt.to_period("M")
 
    tips[
-      ["date1", "date2", "date1_year", "date2_month", "date1_next", "months_between"]
+       ["date1", "date2", "date1_year", "date2_month", "date1_next", "months_between"]
    ].head()
 
 .. ipython:: python
    :suppress:
 
    tips = tips.drop(
-        ["date1", "date2", "date1_year", "date2_month", "date1_next", "months_between"],
-        axis=1,
-    )
+       ["date1", "date2", "date1_year", "date2_month", "date1_next", "months_between"],
+       axis=1,
+   )
 
 Selection of columns
 ~~~~~~~~~~~~~~~~~~~~
diff --git a/doc/source/getting_started/comparison/comparison_with_stata.rst b/doc/source/getting_started/comparison/comparison_with_stata.rst
index 498be88453fc7..9016b55aacb58 100644
--- a/doc/source/getting_started/comparison/comparison_with_stata.rst
+++ b/doc/source/getting_started/comparison/comparison_with_stata.rst
@@ -128,8 +128,8 @@ the data set if presented with a url.
 .. ipython:: python
 
    url = (
-      "https://raw.github.com/pandas-dev"
-      "/pandas/master/pandas/tests/io/data/csv/tips.csv"
+       "https://raw.github.com/pandas-dev"
+       "/pandas/master/pandas/tests/io/data/csv/tips.csv"
    )
    tips = pd.read_csv(url)
    tips.head()
@@ -281,18 +281,18 @@ see the :ref:`timeseries documentation<timeseries>` for more details.
    tips["date2_month"] = tips["date2"].dt.month
    tips["date1_next"] = tips["date1"] + pd.offsets.MonthBegin()
    tips["months_between"] = tips["date2"].dt.to_period("M") - tips[
-      "date1"
+       "date1"
    ].dt.to_period("M")
 
    tips[
-      ["date1", "date2", "date1_year", "date2_month", "date1_next", "months_between"]
+       ["date1", "date2", "date1_year", "date2_month", "date1_next", "months_between"]
    ].head()
 
 .. ipython:: python
    :suppress:
 
    tips = tips.drop(
-      ["date1", "date2", "date1_year", "date2_month", "date1_next", "months_between"],
+       ["date1", "date2", "date1_year", "date2_month", "date1_next", "months_between"],
       axis=1,
    )
 

From 569a71aca2d81392cd274bac0877ef930f2f78fa Mon Sep 17 00:00:00 2001
From: PrayagS <prayag.s@ahduni.edu.in>
Date: Sat, 3 Oct 2020 01:48:00 +0530
Subject: [PATCH 3/3] DOC: fix E131 in comparison_with_stata.rst

---
 doc/source/getting_started/comparison/comparison_with_stata.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/getting_started/comparison/comparison_with_stata.rst b/doc/source/getting_started/comparison/comparison_with_stata.rst
index 9016b55aacb58..7b8d9c6be61db 100644
--- a/doc/source/getting_started/comparison/comparison_with_stata.rst
+++ b/doc/source/getting_started/comparison/comparison_with_stata.rst
@@ -293,7 +293,7 @@ see the :ref:`timeseries documentation<timeseries>` for more details.
 
    tips = tips.drop(
        ["date1", "date2", "date1_year", "date2_month", "date1_next", "months_between"],
-      axis=1,
+       axis=1,
    )
 
 Selection of columns