From 4f60f72e8b75b512ca6e5c214789816051178265 Mon Sep 17 00:00:00 2001 From: PrayagS Date: Sat, 3 Oct 2020 00:42:59 +0530 Subject: [PATCH 1/3] DOC: use black to fix code style in doc pandas-dev#36777 --- .../comparison/comparison_with_r.rst | 153 +++++++++++------- .../comparison/comparison_with_sas.rst | 130 +++++++-------- .../comparison/comparison_with_sql.rst | 110 +++++++------ .../comparison/comparison_with_stata.rst | 120 +++++++------- 4 files changed, 289 insertions(+), 224 deletions(-) diff --git a/doc/source/getting_started/comparison/comparison_with_r.rst b/doc/source/getting_started/comparison/comparison_with_r.rst index e1a4cfe49b7d1..123b911a97aaa 100644 --- a/doc/source/getting_started/comparison/comparison_with_r.rst +++ b/doc/source/getting_started/comparison/comparison_with_r.rst @@ -122,16 +122,16 @@ Selecting multiple columns by name in ``pandas`` is straightforward .. ipython:: python - df = pd.DataFrame(np.random.randn(10, 3), columns=list('abc')) - df[['a', 'c']] - df.loc[:, ['a', 'c']] + df = pd.DataFrame(np.random.randn(10, 3), columns=list("abc")) + df[["a", "c"]] + df.loc[:, ["a", "c"]] Selecting multiple noncontiguous columns by integer location can be achieved with a combination of the ``iloc`` indexer attribute and ``numpy.r_``. .. ipython:: python - named = list('abcdefg') + named = list("abcdefg") n = 30 columns = named + np.arange(len(named), n).tolist() df = pd.DataFrame(np.random.randn(n, n), columns=columns) @@ -160,14 +160,29 @@ function. .. ipython:: python df = pd.DataFrame( - {'v1': [1, 3, 5, 7, 8, 3, 5, np.nan, 4, 5, 7, 9], - 'v2': [11, 33, 55, 77, 88, 33, 55, np.nan, 44, 55, 77, 99], - 'by1': ["red", "blue", 1, 2, np.nan, "big", 1, 2, "red", 1, np.nan, 12], - 'by2': ["wet", "dry", 99, 95, np.nan, "damp", 95, 99, "red", 99, np.nan, - np.nan]}) - - g = df.groupby(['by1', 'by2']) - g[['v1', 'v2']].mean() + { + "v1": [1, 3, 5, 7, 8, 3, 5, np.nan, 4, 5, 7, 9], + "v2": [11, 33, 55, 77, 88, 33, 55, np.nan, 44, 55, 77, 99], + "by1": ["red", "blue", 1, 2, np.nan, "big", 1, 2, "red", 1, np.nan, 12], + "by2": [ + "wet", + "dry", + 99, + 95, + np.nan, + "damp", + 95, + 99, + "red", + 99, + np.nan, + np.nan, + ], + } + ) + + g = df.groupby(["by1", "by2"]) + g[["v1", "v2"]].mean() For more details and examples see :ref:`the groupby documentation `. @@ -228,11 +243,14 @@ In ``pandas`` we may use :meth:`~pandas.pivot_table` method to handle this: import string baseball = pd.DataFrame( - {'team': ["team %d" % (x + 1) for x in range(5)] * 5, - 'player': random.sample(list(string.ascii_lowercase), 25), - 'batting avg': np.random.uniform(.200, .400, 25)}) + { + "team": ["team %d" % (x + 1) for x in range(5)] * 5, + "player": random.sample(list(string.ascii_lowercase), 25), + "batting avg": np.random.uniform(0.200, 0.400, 25), + } + ) - baseball.pivot_table(values='batting avg', columns='team', aggfunc=np.max) + baseball.pivot_table(values="batting avg", columns="team", aggfunc=np.max) For more details and examples see :ref:`the reshaping documentation `. @@ -256,10 +274,10 @@ index/slice as well as standard boolean indexing: .. ipython:: python - df = pd.DataFrame({'a': np.random.randn(10), 'b': np.random.randn(10)}) - df.query('a <= b') - df[df['a'] <= df['b']] - df.loc[df['a'] <= df['b']] + df = pd.DataFrame({"a": np.random.randn(10), "b": np.random.randn(10)}) + df.query("a <= b") + df[df["a"] <= df["b"]] + df.loc[df["a"] <= df["b"]] For more details and examples see :ref:`the query documentation `. @@ -282,9 +300,9 @@ In ``pandas`` the equivalent expression, using the .. ipython:: python - df = pd.DataFrame({'a': np.random.randn(10), 'b': np.random.randn(10)}) - df.eval('a + b') - df['a'] + df['b'] # same as the previous expression + df = pd.DataFrame({"a": np.random.randn(10), "b": np.random.randn(10)}) + df.eval("a + b") + df["a"] + df["b"] # same as the previous expression In certain cases :meth:`~pandas.DataFrame.eval` will be much faster than evaluation in pure Python. For more details and examples see :ref:`the eval @@ -334,14 +352,18 @@ In ``pandas`` the equivalent expression, using the .. ipython:: python - df = pd.DataFrame({'x': np.random.uniform(1., 168., 120), - 'y': np.random.uniform(7., 334., 120), - 'z': np.random.uniform(1.7, 20.7, 120), - 'month': [5, 6, 7, 8] * 30, - 'week': np.random.randint(1, 4, 120)}) + df = pd.DataFrame( + { + "x": np.random.uniform(1.0, 168.0, 120), + "y": np.random.uniform(7.0, 334.0, 120), + "z": np.random.uniform(1.7, 20.7, 120), + "month": [5, 6, 7, 8] * 30, + "week": np.random.randint(1, 4, 120), + } + ) - grouped = df.groupby(['month', 'week']) - grouped['x'].agg([np.mean, np.std]) + grouped = df.groupby(["month", "week"]) + grouped["x"].agg([np.mean, np.std]) For more details and examples see :ref:`the groupby documentation @@ -410,13 +432,17 @@ In Python, the :meth:`~pandas.melt` method is the R equivalent: .. ipython:: python - cheese = pd.DataFrame({'first': ['John', 'Mary'], - 'last': ['Doe', 'Bo'], - 'height': [5.5, 6.0], - 'weight': [130, 150]}) + cheese = pd.DataFrame( + { + "first": ["John", "Mary"], + "last": ["Doe", "Bo"], + "height": [5.5, 6.0], + "weight": [130, 150], + } + ) - pd.melt(cheese, id_vars=['first', 'last']) - cheese.set_index(['first', 'last']).stack() # alternative way + pd.melt(cheese, id_vars=["first", "last"]) + cheese.set_index(["first", "last"]).stack() # alternative way For more details and examples see :ref:`the reshaping documentation `. @@ -444,15 +470,24 @@ In Python the best way is to make use of :meth:`~pandas.pivot_table`: .. ipython:: python - df = pd.DataFrame({'x': np.random.uniform(1., 168., 12), - 'y': np.random.uniform(7., 334., 12), - 'z': np.random.uniform(1.7, 20.7, 12), - 'month': [5, 6, 7] * 4, - 'week': [1, 2] * 6}) - - mdf = pd.melt(df, id_vars=['month', 'week']) - pd.pivot_table(mdf, values='value', index=['variable', 'week'], - columns=['month'], aggfunc=np.mean) + df = pd.DataFrame( + { + "x": np.random.uniform(1.0, 168.0, 12), + "y": np.random.uniform(7.0, 334.0, 12), + "z": np.random.uniform(1.7, 20.7, 12), + "month": [5, 6, 7] * 4, + "week": [1, 2] * 6, + } + ) + + mdf = pd.melt(df, id_vars=["month", "week"]) + pd.pivot_table( + mdf, + values="value", + index=["variable", "week"], + columns=["month"], + aggfunc=np.mean, + ) Similarly for ``dcast`` which uses a data.frame called ``df`` in R to aggregate information based on ``Animal`` and ``FeedType``: @@ -475,21 +510,29 @@ using :meth:`~pandas.pivot_table`: .. ipython:: python - df = pd.DataFrame({ - 'Animal': ['Animal1', 'Animal2', 'Animal3', 'Animal2', 'Animal1', - 'Animal2', 'Animal3'], - 'FeedType': ['A', 'B', 'A', 'A', 'B', 'B', 'A'], - 'Amount': [10, 7, 4, 2, 5, 6, 2], - }) - - df.pivot_table(values='Amount', index='Animal', columns='FeedType', - aggfunc='sum') + df = pd.DataFrame( + { + "Animal": [ + "Animal1", + "Animal2", + "Animal3", + "Animal2", + "Animal1", + "Animal2", + "Animal3", + ], + "FeedType": ["A", "B", "A", "A", "B", "B", "A"], + "Amount": [10, 7, 4, 2, 5, 6, 2], + } + ) + + df.pivot_table(values="Amount", index="Animal", columns="FeedType", aggfunc="sum") The second approach is to use the :meth:`~pandas.DataFrame.groupby` method: .. ipython:: python - df.groupby(['Animal', 'FeedType'])['Amount'].sum() + df.groupby(["Animal", "FeedType"])["Amount"].sum() For more details and examples see :ref:`the reshaping documentation ` or :ref:`the groupby documentation`. diff --git a/doc/source/getting_started/comparison/comparison_with_sas.rst b/doc/source/getting_started/comparison/comparison_with_sas.rst index 85c6ea2c31969..381558b1359f7 100644 --- a/doc/source/getting_started/comparison/comparison_with_sas.rst +++ b/doc/source/getting_started/comparison/comparison_with_sas.rst @@ -106,7 +106,7 @@ and the values are the data. .. ipython:: python - df = pd.DataFrame({'x': [1, 3, 5], 'y': [2, 4, 6]}) + df = pd.DataFrame({"x": [1, 3, 5], "y": [2, 4, 6]}) df @@ -130,10 +130,12 @@ The pandas method is :func:`read_csv`, which works similarly. .. ipython:: python - url = ('https://raw.github.com/pandas-dev/' - 'pandas/master/pandas/tests/io/data/csv/tips.csv') - tips = pd.read_csv(url) - tips.head() + url = ( + "https://raw.github.com/pandas-dev/" + "pandas/master/pandas/tests/io/data/csv/tips.csv" + ) + tips = pd.read_csv(url) + tips.head() Like ``PROC IMPORT``, ``read_csv`` can take a number of parameters to specify @@ -142,10 +144,10 @@ and did not have column names, the pandas command would be: .. code-block:: python - tips = pd.read_csv('tips.csv', sep='\t', header=None) + tips = pd.read_csv("tips.csv", sep="\t", header=None) # alternatively, read_table is an alias to read_csv with tab delimiter - tips = pd.read_table('tips.csv', header=None) + tips = pd.read_table("tips.csv", header=None) In addition to text/csv, pandas supports a variety of other data formats such as Excel, HDF5, and SQL databases. These are all read via a ``pd.read_*`` @@ -166,7 +168,7 @@ and other data formats follow a similar api. .. code-block:: python - tips.to_csv('tips2.csv') + tips.to_csv("tips2.csv") Data operations @@ -192,14 +194,14 @@ New columns can be assigned in the same way. .. ipython:: python - tips['total_bill'] = tips['total_bill'] - 2 - tips['new_bill'] = tips['total_bill'] / 2.0 + tips["total_bill"] = tips["total_bill"] - 2 + tips["new_bill"] = tips["total_bill"] / 2.0 tips.head() .. ipython:: python :suppress: - tips = tips.drop('new_bill', axis=1) + tips = tips.drop("new_bill", axis=1) Filtering ~~~~~~~~~ @@ -226,7 +228,7 @@ DataFrames can be filtered in multiple ways; the most intuitive of which is usin .. ipython:: python - tips[tips['total_bill'] > 10].head() + tips[tips["total_bill"] > 10].head() If/then logic ~~~~~~~~~~~~~ @@ -248,13 +250,13 @@ the ``where`` method from ``numpy``. .. ipython:: python - tips['bucket'] = np.where(tips['total_bill'] < 10, 'low', 'high') + tips["bucket"] = np.where(tips["total_bill"] < 10, "low", "high") tips.head() .. ipython:: python :suppress: - tips = tips.drop('bucket', axis=1) + tips = tips.drop("bucket", axis=1) Date functionality ~~~~~~~~~~~~~~~~~~ @@ -284,22 +286,26 @@ see the :ref:`timeseries documentation` for more details. .. ipython:: python - tips['date1'] = pd.Timestamp('2013-01-15') - tips['date2'] = pd.Timestamp('2015-02-15') - tips['date1_year'] = tips['date1'].dt.year - tips['date2_month'] = tips['date2'].dt.month - tips['date1_next'] = tips['date1'] + pd.offsets.MonthBegin() - tips['months_between'] = ( - tips['date2'].dt.to_period('M') - tips['date1'].dt.to_period('M')) + tips["date1"] = pd.Timestamp("2013-01-15") + tips["date2"] = pd.Timestamp("2015-02-15") + tips["date1_year"] = tips["date1"].dt.year + tips["date2_month"] = tips["date2"].dt.month + tips["date1_next"] = tips["date1"] + pd.offsets.MonthBegin() + tips["months_between"] = tips["date2"].dt.to_period("M") - tips[ + "date1" + ].dt.to_period("M") - tips[['date1', 'date2', 'date1_year', 'date2_month', - 'date1_next', 'months_between']].head() + tips[ + ["date1", "date2", "date1_year", "date2_month", "date1_next", "months_between"] + ].head() .. ipython:: python :suppress: - tips = tips.drop(['date1', 'date2', 'date1_year', - 'date2_month', 'date1_next', 'months_between'], axis=1) + tips = tips.drop( + ["date1", "date2", "date1_year", "date2_month", "date1_next", "months_between"], + axis=1, + ) Selection of columns ~~~~~~~~~~~~~~~~~~~~ @@ -329,13 +335,13 @@ The same operations are expressed in pandas below. .. ipython:: python # keep - tips[['sex', 'total_bill', 'tip']].head() + tips[["sex", "total_bill", "tip"]].head() # drop - tips.drop('sex', axis=1).head() + tips.drop("sex", axis=1).head() # rename - tips.rename(columns={'total_bill': 'total_bill_2'}).head() + tips.rename(columns={"total_bill": "total_bill_2"}).head() Sorting by values @@ -354,7 +360,7 @@ takes a list of columns to sort by. .. ipython:: python - tips = tips.sort_values(['sex', 'total_bill']) + tips = tips.sort_values(["sex", "total_bill"]) tips.head() @@ -383,8 +389,8 @@ trailing blanks. .. ipython:: python - tips['time'].str.len().head() - tips['time'].str.rstrip().str.len().head() + tips["time"].str.len().head() + tips["time"].str.rstrip().str.len().head() Find @@ -410,7 +416,7 @@ the function will return -1 if it fails to find the substring. .. ipython:: python - tips['sex'].str.find("ale").head() + tips["sex"].str.find("ale").head() Substring @@ -432,7 +438,7 @@ indexes are zero-based. .. ipython:: python - tips['sex'].str[0:1].head() + tips["sex"].str[0:1].head() Scan @@ -460,9 +466,9 @@ approaches, but this just shows a simple approach. .. ipython:: python - firstlast = pd.DataFrame({'String': ['John Smith', 'Jane Cook']}) - firstlast['First_Name'] = firstlast['String'].str.split(" ", expand=True)[0] - firstlast['Last_Name'] = firstlast['String'].str.rsplit(" ", expand=True)[0] + firstlast = pd.DataFrame({"String": ["John Smith", "Jane Cook"]}) + firstlast["First_Name"] = firstlast["String"].str.split(" ", expand=True)[0] + firstlast["Last_Name"] = firstlast["String"].str.rsplit(" ", expand=True)[0] firstlast @@ -491,10 +497,10 @@ The equivalent Python functions are ``upper``, ``lower``, and ``title``. .. ipython:: python - firstlast = pd.DataFrame({'String': ['John Smith', 'Jane Cook']}) - firstlast['string_up'] = firstlast['String'].str.upper() - firstlast['string_low'] = firstlast['String'].str.lower() - firstlast['string_prop'] = firstlast['String'].str.title() + firstlast = pd.DataFrame({"String": ["John Smith", "Jane Cook"]}) + firstlast["string_up"] = firstlast["String"].str.upper() + firstlast["string_low"] = firstlast["String"].str.lower() + firstlast["string_prop"] = firstlast["String"].str.title() firstlast Merging @@ -504,11 +510,9 @@ The following tables will be used in the merge examples .. ipython:: python - df1 = pd.DataFrame({'key': ['A', 'B', 'C', 'D'], - 'value': np.random.randn(4)}) + df1 = pd.DataFrame({"key": ["A", "B", "C", "D"], "value": np.random.randn(4)}) df1 - df2 = pd.DataFrame({'key': ['B', 'D', 'D', 'E'], - 'value': np.random.randn(4)}) + df2 = pd.DataFrame({"key": ["B", "D", "D", "E"], "value": np.random.randn(4)}) df2 In SAS, data must be explicitly sorted before merging. Different @@ -542,16 +546,16 @@ types are accomplished via the ``how`` keyword. .. ipython:: python - inner_join = df1.merge(df2, on=['key'], how='inner') + inner_join = df1.merge(df2, on=["key"], how="inner") inner_join - left_join = df1.merge(df2, on=['key'], how='left') + left_join = df1.merge(df2, on=["key"], how="left") left_join - right_join = df1.merge(df2, on=['key'], how='right') + right_join = df1.merge(df2, on=["key"], how="right") right_join - outer_join = df1.merge(df2, on=['key'], how='outer') + outer_join = df1.merge(df2, on=["key"], how="outer") outer_join @@ -566,8 +570,8 @@ operations, and is ignored by default for aggregations. .. ipython:: python outer_join - outer_join['value_x'] + outer_join['value_y'] - outer_join['value_x'].sum() + outer_join["value_x"] + outer_join["value_y"] + outer_join["value_x"].sum() One difference is that missing data cannot be compared to its sentinel value. For example, in SAS you could do this to filter missing values. @@ -589,8 +593,8 @@ should be used for comparisons. .. ipython:: python - outer_join[pd.isna(outer_join['value_x'])] - outer_join[pd.notna(outer_join['value_x'])] + outer_join[pd.isna(outer_join["value_x"])] + outer_join[pd.notna(outer_join["value_x"])] pandas also provides a variety of methods to work with missing data - some of which would be challenging to express in SAS. For example, there are methods to @@ -601,8 +605,8 @@ value, like the mean, or forward filling from previous rows. See the .. ipython:: python outer_join.dropna() - outer_join.fillna(method='ffill') - outer_join['value_x'].fillna(outer_join['value_x'].mean()) + outer_join.fillna(method="ffill") + outer_join["value_x"].fillna(outer_join["value_x"].mean()) GroupBy @@ -629,7 +633,7 @@ for more details and examples. .. ipython:: python - tips_summed = tips.groupby(['sex', 'smoker'])[['total_bill', 'tip']].sum() + tips_summed = tips.groupby(["sex", "smoker"])[["total_bill", "tip"]].sum() tips_summed.head() @@ -666,8 +670,8 @@ operation. .. ipython:: python - gb = tips.groupby('smoker')['total_bill'] - tips['adj_total_bill'] = tips['total_bill'] - gb.transform('mean') + gb = tips.groupby("smoker")["total_bill"] + tips["adj_total_bill"] = tips["total_bill"] - gb.transform("mean") tips.head() @@ -695,7 +699,7 @@ In pandas this would be written as: .. ipython:: python - tips.groupby(['sex', 'smoker']).first() + tips.groupby(["sex", "smoker"]).first() Other considerations @@ -729,16 +733,16 @@ the XPORT or SAS7BDAT binary format. .. code-block:: python - df = pd.read_sas('transport-file.xpt') - df = pd.read_sas('binary-file.sas7bdat') + df = pd.read_sas("transport-file.xpt") + df = pd.read_sas("binary-file.sas7bdat") You can also specify the file format directly. By default, pandas will try to infer the file format based on its extension. .. code-block:: python - df = pd.read_sas('transport-file.xpt', format='xport') - df = pd.read_sas('binary-file.sas7bdat', format='sas7bdat') + df = pd.read_sas("transport-file.xpt", format="xport") + df = pd.read_sas("binary-file.sas7bdat", format="sas7bdat") XPORT is a relatively limited format and the parsing of it is not as optimized as some of the other pandas readers. An alternative way @@ -752,4 +756,4 @@ to interop data between SAS and pandas is to serialize to csv. Wall time: 14.6 s In [9]: %time df = pd.read_csv('big.csv') - Wall time: 4.86 s \ No newline at end of file + Wall time: 4.86 s diff --git a/doc/source/getting_started/comparison/comparison_with_sql.rst b/doc/source/getting_started/comparison/comparison_with_sql.rst index 04f97a27cde39..6848d8df2e46b 100644 --- a/doc/source/getting_started/comparison/comparison_with_sql.rst +++ b/doc/source/getting_started/comparison/comparison_with_sql.rst @@ -24,8 +24,10 @@ structure. .. ipython:: python - url = ('https://raw.github.com/pandas-dev' - '/pandas/master/pandas/tests/io/data/csv/tips.csv') + url = ( + "https://raw.github.com/pandas-dev" + "/pandas/master/pandas/tests/io/data/csv/tips.csv" + ) tips = pd.read_csv(url) tips.head() @@ -44,7 +46,7 @@ With pandas, column selection is done by passing a list of column names to your .. ipython:: python - tips[['total_bill', 'tip', 'smoker', 'time']].head(5) + tips[["total_bill", "tip", "smoker", "time"]].head(5) Calling the DataFrame without the list of column names would display all columns (akin to SQL's ``*``). @@ -61,7 +63,7 @@ With pandas, you can use the :meth:`DataFrame.assign` method of a DataFrame to a .. ipython:: python - tips.assign(tip_rate=tips['tip'] / tips['total_bill']).head(5) + tips.assign(tip_rate=tips["tip"] / tips["total_bill"]).head(5) WHERE ----- @@ -79,14 +81,14 @@ DataFrames can be filtered in multiple ways; the most intuitive of which is usin .. ipython:: python - tips[tips['time'] == 'Dinner'].head(5) + tips[tips["time"] == "Dinner"].head(5) The above statement is simply passing a ``Series`` of True/False objects to the DataFrame, returning all rows with True. .. ipython:: python - is_dinner = tips['time'] == 'Dinner' + is_dinner = tips["time"] == "Dinner" is_dinner.value_counts() tips[is_dinner].head(5) @@ -103,7 +105,7 @@ Just like SQL's OR and AND, multiple conditions can be passed to a DataFrame usi .. ipython:: python # tips of more than $5.00 at Dinner meals - tips[(tips['time'] == 'Dinner') & (tips['tip'] > 5.00)] + tips[(tips["time"] == "Dinner") & (tips["tip"] > 5.00)] .. code-block:: sql @@ -115,15 +117,16 @@ Just like SQL's OR and AND, multiple conditions can be passed to a DataFrame usi .. ipython:: python # tips by parties of at least 5 diners OR bill total was more than $45 - tips[(tips['size'] >= 5) | (tips['total_bill'] > 45)] + tips[(tips["size"] >= 5) | (tips["total_bill"] > 45)] NULL checking is done using the :meth:`~pandas.Series.notna` and :meth:`~pandas.Series.isna` methods. .. ipython:: python - frame = pd.DataFrame({'col1': ['A', 'B', np.NaN, 'C', 'D'], - 'col2': ['F', np.NaN, 'G', 'H', 'I']}) + frame = pd.DataFrame( + {"col1": ["A", "B", np.NaN, "C", "D"], "col2": ["F", np.NaN, "G", "H", "I"]} + ) frame Assume we have a table of the same structure as our DataFrame above. We can see only the records @@ -137,7 +140,7 @@ where ``col2`` IS NULL with the following query: .. ipython:: python - frame[frame['col2'].isna()] + frame[frame["col2"].isna()] Getting items where ``col1`` IS NOT NULL can be done with :meth:`~pandas.Series.notna`. @@ -149,7 +152,7 @@ Getting items where ``col1`` IS NOT NULL can be done with :meth:`~pandas.Series. .. ipython:: python - frame[frame['col1'].notna()] + frame[frame["col1"].notna()] GROUP BY @@ -177,7 +180,7 @@ The pandas equivalent would be: .. ipython:: python - tips.groupby('sex').size() + tips.groupby("sex").size() Notice that in the pandas code we used :meth:`~pandas.core.groupby.DataFrameGroupBy.size` and not :meth:`~pandas.core.groupby.DataFrameGroupBy.count`. This is because @@ -186,14 +189,14 @@ the number of ``not null`` records within each. .. ipython:: python - tips.groupby('sex').count() + tips.groupby("sex").count() Alternatively, we could have applied the :meth:`~pandas.core.groupby.DataFrameGroupBy.count` method to an individual column: .. ipython:: python - tips.groupby('sex')['total_bill'].count() + tips.groupby("sex")["total_bill"].count() Multiple functions can also be applied at once. For instance, say we'd like to see how tip amount differs by day of the week - :meth:`~pandas.core.groupby.DataFrameGroupBy.agg` allows you to pass a dictionary @@ -213,7 +216,7 @@ to your grouped DataFrame, indicating which functions to apply to specific colum .. ipython:: python - tips.groupby('day').agg({'tip': np.mean, 'day': np.size}) + tips.groupby("day").agg({"tip": np.mean, "day": np.size}) Grouping by more than one column is done by passing a list of columns to the :meth:`~pandas.DataFrame.groupby` method. @@ -237,7 +240,7 @@ Grouping by more than one column is done by passing a list of columns to the .. ipython:: python - tips.groupby(['smoker', 'day']).agg({'tip': [np.size, np.mean]}) + tips.groupby(["smoker", "day"]).agg({"tip": [np.size, np.mean]}) .. _compare_with_sql.join: @@ -250,10 +253,8 @@ columns to join on (column names or indices). .. ipython:: python - df1 = pd.DataFrame({'key': ['A', 'B', 'C', 'D'], - 'value': np.random.randn(4)}) - df2 = pd.DataFrame({'key': ['B', 'D', 'D', 'E'], - 'value': np.random.randn(4)}) + df1 = pd.DataFrame({"key": ["A", "B", "C", "D"], "value": np.random.randn(4)}) + df2 = pd.DataFrame({"key": ["B", "D", "D", "E"], "value": np.random.randn(4)}) Assume we have two database tables of the same name and structure as our DataFrames. @@ -271,15 +272,15 @@ INNER JOIN .. ipython:: python # merge performs an INNER JOIN by default - pd.merge(df1, df2, on='key') + pd.merge(df1, df2, on="key") :meth:`~pandas.merge` also offers parameters for cases when you'd like to join one DataFrame's column with another DataFrame's index. .. ipython:: python - indexed_df2 = df2.set_index('key') - pd.merge(df1, indexed_df2, left_on='key', right_index=True) + indexed_df2 = df2.set_index("key") + pd.merge(df1, indexed_df2, left_on="key", right_index=True) LEFT OUTER JOIN ~~~~~~~~~~~~~~~ @@ -294,7 +295,7 @@ LEFT OUTER JOIN .. ipython:: python # show all records from df1 - pd.merge(df1, df2, on='key', how='left') + pd.merge(df1, df2, on="key", how="left") RIGHT JOIN ~~~~~~~~~~ @@ -309,7 +310,7 @@ RIGHT JOIN .. ipython:: python # show all records from df2 - pd.merge(df1, df2, on='key', how='right') + pd.merge(df1, df2, on="key", how="right") FULL JOIN ~~~~~~~~~ @@ -327,7 +328,7 @@ joined columns find a match. As of writing, FULL JOINs are not supported in all .. ipython:: python # show all records from both frames - pd.merge(df1, df2, on='key', how='outer') + pd.merge(df1, df2, on="key", how="outer") UNION @@ -336,10 +337,12 @@ UNION ALL can be performed using :meth:`~pandas.concat`. .. ipython:: python - df1 = pd.DataFrame({'city': ['Chicago', 'San Francisco', 'New York City'], - 'rank': range(1, 4)}) - df2 = pd.DataFrame({'city': ['Chicago', 'Boston', 'Los Angeles'], - 'rank': [1, 4, 5]}) + df1 = pd.DataFrame( + {"city": ["Chicago", "San Francisco", "New York City"], "rank": range(1, 4)} + ) + df2 = pd.DataFrame( + {"city": ["Chicago", "Boston", "Los Angeles"], "rank": [1, 4, 5]} + ) .. code-block:: sql @@ -403,7 +406,7 @@ Top n rows with offset .. ipython:: python - tips.nlargest(10 + 5, columns='tip').tail(10) + tips.nlargest(10 + 5, columns="tip").tail(10) Top n rows per group ~~~~~~~~~~~~~~~~~~~~ @@ -423,20 +426,30 @@ Top n rows per group .. ipython:: python - (tips.assign(rn=tips.sort_values(['total_bill'], ascending=False) - .groupby(['day']) - .cumcount() + 1) - .query('rn < 3') - .sort_values(['day', 'rn'])) + ( + tips.assign( + rn=tips.sort_values(["total_bill"], ascending=False) + .groupby(["day"]) + .cumcount() + + 1 + ) + .query("rn < 3") + .sort_values(["day", "rn"]) + ) the same using ``rank(method='first')`` function .. ipython:: python - (tips.assign(rnk=tips.groupby(['day'])['total_bill'] - .rank(method='first', ascending=False)) - .query('rnk < 3') - .sort_values(['day', 'rnk'])) + ( + tips.assign( + rnk=tips.groupby(["day"])["total_bill"].rank( + method="first", ascending=False + ) + ) + .query("rnk < 3") + .sort_values(["day", "rnk"]) + ) .. code-block:: sql @@ -458,11 +471,12 @@ Notice that when using ``rank(method='min')`` function .. ipython:: python - (tips[tips['tip'] < 2] - .assign(rnk_min=tips.groupby(['sex'])['tip'] - .rank(method='min')) - .query('rnk_min < 3') - .sort_values(['sex', 'rnk_min'])) + ( + tips[tips["tip"] < 2] + .assign(rnk_min=tips.groupby(["sex"])["tip"].rank(method="min")) + .query("rnk_min < 3") + .sort_values(["sex", "rnk_min"]) + ) UPDATE @@ -476,7 +490,7 @@ UPDATE .. ipython:: python - tips.loc[tips['tip'] < 2, 'tip'] *= 2 + tips.loc[tips["tip"] < 2, "tip"] *= 2 DELETE ------ @@ -490,4 +504,4 @@ In pandas we select the rows that should remain, instead of deleting them .. ipython:: python - tips = tips.loc[tips['tip'] <= 9] + tips = tips.loc[tips["tip"] <= 9] diff --git a/doc/source/getting_started/comparison/comparison_with_stata.rst b/doc/source/getting_started/comparison/comparison_with_stata.rst index 06f9e45466243..498be88453fc7 100644 --- a/doc/source/getting_started/comparison/comparison_with_stata.rst +++ b/doc/source/getting_started/comparison/comparison_with_stata.rst @@ -103,7 +103,7 @@ and the values are the data. .. ipython:: python - df = pd.DataFrame({'x': [1, 3, 5], 'y': [2, 4, 6]}) + df = pd.DataFrame({"x": [1, 3, 5], "y": [2, 4, 6]}) df @@ -127,8 +127,10 @@ the data set if presented with a url. .. ipython:: python - url = ('https://raw.github.com/pandas-dev' - '/pandas/master/pandas/tests/io/data/csv/tips.csv') + url = ( + "https://raw.github.com/pandas-dev" + "/pandas/master/pandas/tests/io/data/csv/tips.csv" + ) tips = pd.read_csv(url) tips.head() @@ -139,16 +141,16 @@ the pandas command would be: .. code-block:: python - tips = pd.read_csv('tips.csv', sep='\t', header=None) + tips = pd.read_csv("tips.csv", sep="\t", header=None) # alternatively, read_table is an alias to read_csv with tab delimiter - tips = pd.read_table('tips.csv', header=None) + tips = pd.read_table("tips.csv", header=None) Pandas can also read Stata data sets in ``.dta`` format with the :func:`read_stata` function. .. code-block:: python - df = pd.read_stata('data.dta') + df = pd.read_stata("data.dta") In addition to text/csv and Stata files, pandas supports a variety of other data formats such as Excel, SAS, HDF5, Parquet, and SQL databases. These are all read via a ``pd.read_*`` @@ -168,13 +170,13 @@ Similarly in pandas, the opposite of ``read_csv`` is :meth:`DataFrame.to_csv`. .. code-block:: python - tips.to_csv('tips2.csv') + tips.to_csv("tips2.csv") Pandas can also export to Stata file format with the :meth:`DataFrame.to_stata` method. .. code-block:: python - tips.to_stata('tips2.dta') + tips.to_stata("tips2.dta") Data operations @@ -200,11 +202,11 @@ drops a column from the ``DataFrame``. .. ipython:: python - tips['total_bill'] = tips['total_bill'] - 2 - tips['new_bill'] = tips['total_bill'] / 2 + tips["total_bill"] = tips["total_bill"] - 2 + tips["new_bill"] = tips["total_bill"] / 2 tips.head() - tips = tips.drop('new_bill', axis=1) + tips = tips.drop("new_bill", axis=1) Filtering ~~~~~~~~~ @@ -220,7 +222,7 @@ DataFrames can be filtered in multiple ways; the most intuitive of which is usin .. ipython:: python - tips[tips['total_bill'] > 10].head() + tips[tips["total_bill"] > 10].head() If/then logic ~~~~~~~~~~~~~ @@ -237,13 +239,13 @@ the ``where`` method from ``numpy``. .. ipython:: python - tips['bucket'] = np.where(tips['total_bill'] < 10, 'low', 'high') + tips["bucket"] = np.where(tips["total_bill"] < 10, "low", "high") tips.head() .. ipython:: python :suppress: - tips = tips.drop('bucket', axis=1) + tips = tips.drop("bucket", axis=1) Date functionality ~~~~~~~~~~~~~~~~~~ @@ -273,22 +275,26 @@ see the :ref:`timeseries documentation` for more details. .. ipython:: python - tips['date1'] = pd.Timestamp('2013-01-15') - tips['date2'] = pd.Timestamp('2015-02-15') - tips['date1_year'] = tips['date1'].dt.year - tips['date2_month'] = tips['date2'].dt.month - tips['date1_next'] = tips['date1'] + pd.offsets.MonthBegin() - tips['months_between'] = (tips['date2'].dt.to_period('M') - - tips['date1'].dt.to_period('M')) + tips["date1"] = pd.Timestamp("2013-01-15") + tips["date2"] = pd.Timestamp("2015-02-15") + tips["date1_year"] = tips["date1"].dt.year + tips["date2_month"] = tips["date2"].dt.month + tips["date1_next"] = tips["date1"] + pd.offsets.MonthBegin() + tips["months_between"] = tips["date2"].dt.to_period("M") - tips[ + "date1" + ].dt.to_period("M") - tips[['date1', 'date2', 'date1_year', 'date2_month', 'date1_next', - 'months_between']].head() + tips[ + ["date1", "date2", "date1_year", "date2_month", "date1_next", "months_between"] + ].head() .. ipython:: python :suppress: - tips = tips.drop(['date1', 'date2', 'date1_year', 'date2_month', - 'date1_next', 'months_between'], axis=1) + tips = tips.drop( + ["date1", "date2", "date1_year", "date2_month", "date1_next", "months_between"], + axis=1, + ) Selection of columns ~~~~~~~~~~~~~~~~~~~~ @@ -310,13 +316,13 @@ to a variable. .. ipython:: python # keep - tips[['sex', 'total_bill', 'tip']].head() + tips[["sex", "total_bill", "tip"]].head() # drop - tips.drop('sex', axis=1).head() + tips.drop("sex", axis=1).head() # rename - tips.rename(columns={'total_bill': 'total_bill_2'}).head() + tips.rename(columns={"total_bill": "total_bill_2"}).head() Sorting by values @@ -333,7 +339,7 @@ takes a list of columns to sort by. .. ipython:: python - tips = tips.sort_values(['sex', 'total_bill']) + tips = tips.sort_values(["sex", "total_bill"]) tips.head() @@ -357,8 +363,8 @@ Use ``len`` and ``rstrip`` to exclude trailing blanks. .. ipython:: python - tips['time'].str.len().head() - tips['time'].str.rstrip().str.len().head() + tips["time"].str.len().head() + tips["time"].str.rstrip().str.len().head() Finding position of substring @@ -380,7 +386,7 @@ the function will return -1 if it fails to find the substring. .. ipython:: python - tips['sex'].str.find("ale").head() + tips["sex"].str.find("ale").head() Extracting substring by position @@ -398,7 +404,7 @@ indexes are zero-based. .. ipython:: python - tips['sex'].str[0:1].head() + tips["sex"].str[0:1].head() Extracting nth word @@ -425,9 +431,9 @@ approaches, but this just shows a simple approach. .. ipython:: python - firstlast = pd.DataFrame({'string': ['John Smith', 'Jane Cook']}) - firstlast['First_Name'] = firstlast['string'].str.split(" ", expand=True)[0] - firstlast['Last_Name'] = firstlast['string'].str.rsplit(" ", expand=True)[0] + firstlast = pd.DataFrame({"string": ["John Smith", "Jane Cook"]}) + firstlast["First_Name"] = firstlast["string"].str.split(" ", expand=True)[0] + firstlast["Last_Name"] = firstlast["string"].str.rsplit(" ", expand=True)[0] firstlast @@ -455,10 +461,10 @@ The equivalent Python functions are ``upper``, ``lower``, and ``title``. .. ipython:: python - firstlast = pd.DataFrame({'string': ['John Smith', 'Jane Cook']}) - firstlast['upper'] = firstlast['string'].str.upper() - firstlast['lower'] = firstlast['string'].str.lower() - firstlast['title'] = firstlast['string'].str.title() + firstlast = pd.DataFrame({"string": ["John Smith", "Jane Cook"]}) + firstlast["upper"] = firstlast["string"].str.upper() + firstlast["lower"] = firstlast["string"].str.lower() + firstlast["title"] = firstlast["string"].str.title() firstlast Merging @@ -468,11 +474,9 @@ The following tables will be used in the merge examples .. ipython:: python - df1 = pd.DataFrame({'key': ['A', 'B', 'C', 'D'], - 'value': np.random.randn(4)}) + df1 = pd.DataFrame({"key": ["A", "B", "C", "D"], "value": np.random.randn(4)}) df1 - df2 = pd.DataFrame({'key': ['B', 'D', 'D', 'E'], - 'value': np.random.randn(4)}) + df2 = pd.DataFrame({"key": ["B", "D", "D", "E"], "value": np.random.randn(4)}) df2 In Stata, to perform a merge, one data set must be in memory @@ -534,16 +538,16 @@ types are accomplished via the ``how`` keyword. .. ipython:: python - inner_join = df1.merge(df2, on=['key'], how='inner') + inner_join = df1.merge(df2, on=["key"], how="inner") inner_join - left_join = df1.merge(df2, on=['key'], how='left') + left_join = df1.merge(df2, on=["key"], how="left") left_join - right_join = df1.merge(df2, on=['key'], how='right') + right_join = df1.merge(df2, on=["key"], how="right") right_join - outer_join = df1.merge(df2, on=['key'], how='outer') + outer_join = df1.merge(df2, on=["key"], how="outer") outer_join @@ -558,8 +562,8 @@ operations, and is ignored by default for aggregations. .. ipython:: python outer_join - outer_join['value_x'] + outer_join['value_y'] - outer_join['value_x'].sum() + outer_join["value_x"] + outer_join["value_y"] + outer_join["value_x"].sum() One difference is that missing data cannot be compared to its sentinel value. For example, in Stata you could do this to filter missing values. @@ -576,8 +580,8 @@ should be used for comparisons. .. ipython:: python - outer_join[pd.isna(outer_join['value_x'])] - outer_join[pd.notna(outer_join['value_x'])] + outer_join[pd.isna(outer_join["value_x"])] + outer_join[pd.notna(outer_join["value_x"])] Pandas also provides a variety of methods to work with missing data -- some of which would be challenging to express in Stata. For example, there are methods to @@ -591,10 +595,10 @@ value, like the mean, or forward filling from previous rows. See the outer_join.dropna() # Fill forwards - outer_join.fillna(method='ffill') + outer_join.fillna(method="ffill") # Impute missing values with the mean - outer_join['value_x'].fillna(outer_join['value_x'].mean()) + outer_join["value_x"].fillna(outer_join["value_x"].mean()) GroupBy @@ -617,7 +621,7 @@ for more details and examples. .. ipython:: python - tips_summed = tips.groupby(['sex', 'smoker'])[['total_bill', 'tip']].sum() + tips_summed = tips.groupby(["sex", "smoker"])[["total_bill", "tip"]].sum() tips_summed.head() @@ -640,8 +644,8 @@ operation. .. ipython:: python - gb = tips.groupby('smoker')['total_bill'] - tips['adj_total_bill'] = tips['total_bill'] - gb.transform('mean') + gb = tips.groupby("smoker")["total_bill"] + tips["adj_total_bill"] = tips["total_bill"] - gb.transform("mean") tips.head() @@ -661,7 +665,7 @@ In pandas this would be written as: .. ipython:: python - tips.groupby(['sex', 'smoker']).first() + tips.groupby(["sex", "smoker"]).first() Other considerations From ba99f249f75e80e4804a2dd9161f1e8a17a099e8 Mon Sep 17 00:00:00 2001 From: PrayagS Date: Sat, 3 Oct 2020 01:31:41 +0530 Subject: [PATCH 2/3] DOC: fix flake8-rst errors --- .../comparison/comparison_with_r.rst | 156 +++++++++--------- .../comparison/comparison_with_sas.rst | 20 +-- .../comparison/comparison_with_stata.rst | 10 +- 3 files changed, 93 insertions(+), 93 deletions(-) diff --git a/doc/source/getting_started/comparison/comparison_with_r.rst b/doc/source/getting_started/comparison/comparison_with_r.rst index 123b911a97aaa..358bb6ad951f0 100644 --- a/doc/source/getting_started/comparison/comparison_with_r.rst +++ b/doc/source/getting_started/comparison/comparison_with_r.rst @@ -160,29 +160,29 @@ function. .. ipython:: python df = pd.DataFrame( - { - "v1": [1, 3, 5, 7, 8, 3, 5, np.nan, 4, 5, 7, 9], - "v2": [11, 33, 55, 77, 88, 33, 55, np.nan, 44, 55, 77, 99], - "by1": ["red", "blue", 1, 2, np.nan, "big", 1, 2, "red", 1, np.nan, 12], - "by2": [ - "wet", - "dry", - 99, - 95, - np.nan, - "damp", - 95, - 99, - "red", - 99, - np.nan, - np.nan, - ], - } - ) - - g = df.groupby(["by1", "by2"]) - g[["v1", "v2"]].mean() + { + "v1": [1, 3, 5, 7, 8, 3, 5, np.nan, 4, 5, 7, 9], + "v2": [11, 33, 55, 77, 88, 33, 55, np.nan, 44, 55, 77, 99], + "by1": ["red", "blue", 1, 2, np.nan, "big", 1, 2, "red", 1, np.nan, 12], + "by2": [ + "wet", + "dry", + 99, + 95, + np.nan, + "damp", + 95, + 99, + "red", + 99, + np.nan, + np.nan, + ], + } + ) + + g = df.groupby(["by1", "by2"]) + g[["v1", "v2"]].mean() For more details and examples see :ref:`the groupby documentation `. @@ -243,14 +243,14 @@ In ``pandas`` we may use :meth:`~pandas.pivot_table` method to handle this: import string baseball = pd.DataFrame( - { - "team": ["team %d" % (x + 1) for x in range(5)] * 5, - "player": random.sample(list(string.ascii_lowercase), 25), - "batting avg": np.random.uniform(0.200, 0.400, 25), - } - ) + { + "team": ["team %d" % (x + 1) for x in range(5)] * 5, + "player": random.sample(list(string.ascii_lowercase), 25), + "batting avg": np.random.uniform(0.200, 0.400, 25), + } + ) - baseball.pivot_table(values="batting avg", columns="team", aggfunc=np.max) + baseball.pivot_table(values="batting avg", columns="team", aggfunc=np.max) For more details and examples see :ref:`the reshaping documentation `. @@ -353,14 +353,14 @@ In ``pandas`` the equivalent expression, using the .. ipython:: python df = pd.DataFrame( - { - "x": np.random.uniform(1.0, 168.0, 120), - "y": np.random.uniform(7.0, 334.0, 120), - "z": np.random.uniform(1.7, 20.7, 120), - "month": [5, 6, 7, 8] * 30, - "week": np.random.randint(1, 4, 120), - } - ) + { + "x": np.random.uniform(1.0, 168.0, 120), + "y": np.random.uniform(7.0, 334.0, 120), + "z": np.random.uniform(1.7, 20.7, 120), + "month": [5, 6, 7, 8] * 30, + "week": np.random.randint(1, 4, 120), + } + ) grouped = df.groupby(["month", "week"]) grouped["x"].agg([np.mean, np.std]) @@ -433,13 +433,13 @@ In Python, the :meth:`~pandas.melt` method is the R equivalent: .. ipython:: python cheese = pd.DataFrame( - { - "first": ["John", "Mary"], - "last": ["Doe", "Bo"], - "height": [5.5, 6.0], - "weight": [130, 150], - } - ) + { + "first": ["John", "Mary"], + "last": ["Doe", "Bo"], + "height": [5.5, 6.0], + "weight": [130, 150], + } + ) pd.melt(cheese, id_vars=["first", "last"]) cheese.set_index(["first", "last"]).stack() # alternative way @@ -471,23 +471,23 @@ In Python the best way is to make use of :meth:`~pandas.pivot_table`: .. ipython:: python df = pd.DataFrame( - { - "x": np.random.uniform(1.0, 168.0, 12), - "y": np.random.uniform(7.0, 334.0, 12), - "z": np.random.uniform(1.7, 20.7, 12), - "month": [5, 6, 7] * 4, - "week": [1, 2] * 6, - } - ) - - mdf = pd.melt(df, id_vars=["month", "week"]) - pd.pivot_table( - mdf, - values="value", - index=["variable", "week"], - columns=["month"], - aggfunc=np.mean, - ) + { + "x": np.random.uniform(1.0, 168.0, 12), + "y": np.random.uniform(7.0, 334.0, 12), + "z": np.random.uniform(1.7, 20.7, 12), + "month": [5, 6, 7] * 4, + "week": [1, 2] * 6, + } + ) + + mdf = pd.melt(df, id_vars=["month", "week"]) + pd.pivot_table( + mdf, + values="value", + index=["variable", "week"], + columns=["month"], + aggfunc=np.mean, + ) Similarly for ``dcast`` which uses a data.frame called ``df`` in R to aggregate information based on ``Animal`` and ``FeedType``: @@ -511,22 +511,22 @@ using :meth:`~pandas.pivot_table`: .. ipython:: python df = pd.DataFrame( - { - "Animal": [ - "Animal1", - "Animal2", - "Animal3", - "Animal2", - "Animal1", - "Animal2", - "Animal3", - ], - "FeedType": ["A", "B", "A", "A", "B", "B", "A"], - "Amount": [10, 7, 4, 2, 5, 6, 2], - } - ) - - df.pivot_table(values="Amount", index="Animal", columns="FeedType", aggfunc="sum") + { + "Animal": [ + "Animal1", + "Animal2", + "Animal3", + "Animal2", + "Animal1", + "Animal2", + "Animal3", + ], + "FeedType": ["A", "B", "A", "A", "B", "B", "A"], + "Amount": [10, 7, 4, 2, 5, 6, 2], + } + ) + + df.pivot_table(values="Amount", index="Animal", columns="FeedType", aggfunc="sum") The second approach is to use the :meth:`~pandas.DataFrame.groupby` method: diff --git a/doc/source/getting_started/comparison/comparison_with_sas.rst b/doc/source/getting_started/comparison/comparison_with_sas.rst index 381558b1359f7..ae9f1caebd556 100644 --- a/doc/source/getting_started/comparison/comparison_with_sas.rst +++ b/doc/source/getting_started/comparison/comparison_with_sas.rst @@ -131,11 +131,11 @@ The pandas method is :func:`read_csv`, which works similarly. .. ipython:: python url = ( - "https://raw.github.com/pandas-dev/" - "pandas/master/pandas/tests/io/data/csv/tips.csv" - ) - tips = pd.read_csv(url) - tips.head() + "https://raw.github.com/pandas-dev/" + "pandas/master/pandas/tests/io/data/csv/tips.csv" + ) + tips = pd.read_csv(url) + tips.head() Like ``PROC IMPORT``, ``read_csv`` can take a number of parameters to specify @@ -292,20 +292,20 @@ see the :ref:`timeseries documentation` for more details. tips["date2_month"] = tips["date2"].dt.month tips["date1_next"] = tips["date1"] + pd.offsets.MonthBegin() tips["months_between"] = tips["date2"].dt.to_period("M") - tips[ - "date1" + "date1" ].dt.to_period("M") tips[ - ["date1", "date2", "date1_year", "date2_month", "date1_next", "months_between"] + ["date1", "date2", "date1_year", "date2_month", "date1_next", "months_between"] ].head() .. ipython:: python :suppress: tips = tips.drop( - ["date1", "date2", "date1_year", "date2_month", "date1_next", "months_between"], - axis=1, - ) + ["date1", "date2", "date1_year", "date2_month", "date1_next", "months_between"], + axis=1, + ) Selection of columns ~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/source/getting_started/comparison/comparison_with_stata.rst b/doc/source/getting_started/comparison/comparison_with_stata.rst index 498be88453fc7..9016b55aacb58 100644 --- a/doc/source/getting_started/comparison/comparison_with_stata.rst +++ b/doc/source/getting_started/comparison/comparison_with_stata.rst @@ -128,8 +128,8 @@ the data set if presented with a url. .. ipython:: python url = ( - "https://raw.github.com/pandas-dev" - "/pandas/master/pandas/tests/io/data/csv/tips.csv" + "https://raw.github.com/pandas-dev" + "/pandas/master/pandas/tests/io/data/csv/tips.csv" ) tips = pd.read_csv(url) tips.head() @@ -281,18 +281,18 @@ see the :ref:`timeseries documentation` for more details. tips["date2_month"] = tips["date2"].dt.month tips["date1_next"] = tips["date1"] + pd.offsets.MonthBegin() tips["months_between"] = tips["date2"].dt.to_period("M") - tips[ - "date1" + "date1" ].dt.to_period("M") tips[ - ["date1", "date2", "date1_year", "date2_month", "date1_next", "months_between"] + ["date1", "date2", "date1_year", "date2_month", "date1_next", "months_between"] ].head() .. ipython:: python :suppress: tips = tips.drop( - ["date1", "date2", "date1_year", "date2_month", "date1_next", "months_between"], + ["date1", "date2", "date1_year", "date2_month", "date1_next", "months_between"], axis=1, ) From 569a71aca2d81392cd274bac0877ef930f2f78fa Mon Sep 17 00:00:00 2001 From: PrayagS Date: Sat, 3 Oct 2020 01:48:00 +0530 Subject: [PATCH 3/3] DOC: fix E131 in comparison_with_stata.rst --- doc/source/getting_started/comparison/comparison_with_stata.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/getting_started/comparison/comparison_with_stata.rst b/doc/source/getting_started/comparison/comparison_with_stata.rst index 9016b55aacb58..7b8d9c6be61db 100644 --- a/doc/source/getting_started/comparison/comparison_with_stata.rst +++ b/doc/source/getting_started/comparison/comparison_with_stata.rst @@ -293,7 +293,7 @@ see the :ref:`timeseries documentation` for more details. tips = tips.drop( ["date1", "date2", "date1_year", "date2_month", "date1_next", "months_between"], - axis=1, + axis=1, ) Selection of columns