From 2976283611fc711c2452612d8fd45225cf7a308d Mon Sep 17 00:00:00 2001 From: Erfan Nariman Date: Thu, 1 Oct 2020 19:04:31 +0200 Subject: [PATCH 1/6] reformat documentation --- .../06_calculate_statistics.rst | 6 ++- .../07_reshape_table_layout.rst | 39 ++++++++++++------- .../intro_tutorials/08_combine_dataframes.rst | 14 +++++-- .../intro_tutorials/09_timeseries.rst | 7 ++-- .../intro_tutorials/10_text_data.rst | 5 ++- test.py | 3 ++ 6 files changed, 49 insertions(+), 25 deletions(-) create mode 100644 test.py diff --git a/doc/source/getting_started/intro_tutorials/06_calculate_statistics.rst b/doc/source/getting_started/intro_tutorials/06_calculate_statistics.rst index bd85160d2622a..c7cb54cddb2bf 100644 --- a/doc/source/getting_started/intro_tutorials/06_calculate_statistics.rst +++ b/doc/source/getting_started/intro_tutorials/06_calculate_statistics.rst @@ -122,8 +122,10 @@ aggregating statistics for given columns can be defined using the .. ipython:: python - titanic.agg({'Age': ['min', 'max', 'median', 'skew'], - 'Fare': ['min', 'max', 'median', 'mean']}) + titanic.agg({ + "Age": ["min", "max", "median", "skew"], + "Fare": ["min", "max", "median", "mean"] + }) .. raw:: html diff --git a/doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst b/doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst index c16fec6aaba9f..a04a2a5743de5 100644 --- a/doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst +++ b/doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst @@ -101,8 +101,11 @@ measurement. .. ipython:: python - air_quality = pd.read_csv("data/air_quality_long.csv", - index_col="date.utc", parse_dates=True) + air_quality = pd.read_csv( + "data/air_quality_long.csv", + index_col="date.utc", + parse_dates=True + ) air_quality.head() .. raw:: html @@ -247,8 +250,10 @@ I want the mean concentrations for :math:`NO_2` and :math:`PM_{2.5}` in each of .. ipython:: python - air_quality.pivot_table(values="value", index="location", - columns="parameter", aggfunc="mean") + air_quality.pivot_table( + values="value", index="location", + columns="parameter", aggfunc="mean" +) In the case of :meth:`~DataFrame.pivot`, the data is only rearranged. When multiple values need to be aggregated (in this specific case, the values on @@ -266,9 +271,13 @@ the ``margin`` parameter to ``True``: .. ipython:: python - air_quality.pivot_table(values="value", index="location", - columns="parameter", aggfunc="mean", - margins=True) + air_quality.pivot_table( + values="value", + index="location", + columns="parameter", + aggfunc="mean", + margins=True + ) .. raw:: html @@ -345,12 +354,16 @@ The :func:`pandas.melt` method can be defined in more detail: .. ipython:: python - no_2 = no2_pivoted.melt(id_vars="date.utc", - value_vars=["BETR801", - "FR04014", - "London Westminster"], - value_name="NO_2", - var_name="id_location") + no_2 = no2_pivoted.melt( + id_vars="date.utc", + value_vars=[ + "BETR801", + "FR04014", + "London Westminster" + ], + value_name="NO_2", + var_name="id_location" + ) no_2.head() The result in the same, but in more detail defined: diff --git a/doc/source/getting_started/intro_tutorials/08_combine_dataframes.rst b/doc/source/getting_started/intro_tutorials/08_combine_dataframes.rst index d6da9a0aa4f22..ecbfc61cfb4d3 100644 --- a/doc/source/getting_started/intro_tutorials/08_combine_dataframes.rst +++ b/doc/source/getting_started/intro_tutorials/08_combine_dataframes.rst @@ -155,8 +155,10 @@ index. For example: .. ipython:: python - air_quality_ = pd.concat([air_quality_pm25, air_quality_no2], - keys=["PM25", "NO2"]) + air_quality_ = pd.concat([ + air_quality_pm25, air_quality_no2], + keys=["PM25", "NO2" + ]) .. ipython:: python @@ -233,8 +235,12 @@ Add the station coordinates, provided by the stations metadata table, to the cor .. ipython:: python - air_quality = pd.merge(air_quality, stations_coord, - how='left', on='location') + air_quality = pd.merge( + air_quality, + stations_coord, + how='left', + on='location' + ) air_quality.head() Using the :meth:`~pandas.merge` function, for each of the rows in the diff --git a/doc/source/getting_started/intro_tutorials/09_timeseries.rst b/doc/source/getting_started/intro_tutorials/09_timeseries.rst index 19351e0e3bc75..598d3514baa15 100644 --- a/doc/source/getting_started/intro_tutorials/09_timeseries.rst +++ b/doc/source/getting_started/intro_tutorials/09_timeseries.rst @@ -204,10 +204,9 @@ Plot the typical :math:`NO_2` pattern during the day of our time series of all s .. ipython:: python fig, axs = plt.subplots(figsize=(12, 4)) - air_quality.groupby( - air_quality["datetime"].dt.hour)["value"].mean().plot(kind='bar', - rot=0, - ax=axs) + air_quality.groupby(air_quality["datetime"].dt.hour)["value"].mean().plot( + kind='bar', rot=0, ax=axs + ) plt.xlabel("Hour of the day"); # custom x label using matplotlib @savefig 09_bar_chart.png plt.ylabel("$NO_2 (µg/m^3)$"); diff --git a/doc/source/getting_started/intro_tutorials/10_text_data.rst b/doc/source/getting_started/intro_tutorials/10_text_data.rst index 93ad35fb1960b..a8ad4b5e03788 100644 --- a/doc/source/getting_started/intro_tutorials/10_text_data.rst +++ b/doc/source/getting_started/intro_tutorials/10_text_data.rst @@ -224,8 +224,9 @@ In the "Sex" column, replace values of "male" by "M" and values of "female" by " .. ipython:: python - titanic["Sex_short"] = titanic["Sex"].replace({"male": "M", - "female": "F"}) + titanic["Sex_short"] = titanic["Sex"].replace({ + "male": "M", "female": "F" + }) titanic["Sex_short"] Whereas :meth:`~Series.replace` is not a string method, it provides a convenient way diff --git a/test.py b/test.py new file mode 100644 index 0000000000000..1950b15c290c6 --- /dev/null +++ b/test.py @@ -0,0 +1,3 @@ +titanic["Sex_short"] = titanic["Sex"].replace({ + "male": "M", "female": "F" +}) \ No newline at end of file From ad7bb1d77bc8de9ee54799916563952bf370c878 Mon Sep 17 00:00:00 2001 From: Erfan Nariman Date: Thu, 1 Oct 2020 19:06:33 +0200 Subject: [PATCH 2/6] remove test file --- test.py | 3 --- 1 file changed, 3 deletions(-) delete mode 100644 test.py diff --git a/test.py b/test.py deleted file mode 100644 index 1950b15c290c6..0000000000000 --- a/test.py +++ /dev/null @@ -1,3 +0,0 @@ -titanic["Sex_short"] = titanic["Sex"].replace({ - "male": "M", "female": "F" -}) \ No newline at end of file From 4aa267acc1c08bb98b45417573f47578cdca5430 Mon Sep 17 00:00:00 2001 From: Erfan Nariman Date: Thu, 1 Oct 2020 19:19:53 +0200 Subject: [PATCH 3/6] more code formatting --- doc/source/user_guide/10min.rst | 76 ++++++++++++++++++++------------ doc/source/user_guide/sparse.rst | 35 ++++++++------- 2 files changed, 66 insertions(+), 45 deletions(-) diff --git a/doc/source/user_guide/10min.rst b/doc/source/user_guide/10min.rst index c3746cbe777a3..b59a684faea06 100644 --- a/doc/source/user_guide/10min.rst +++ b/doc/source/user_guide/10min.rst @@ -43,12 +43,16 @@ Creating a :class:`DataFrame` by passing a dict of objects that can be converted .. ipython:: python - df2 = pd.DataFrame({'A': 1., - 'B': pd.Timestamp('20130102'), - 'C': pd.Series(1, index=list(range(4)), dtype='float32'), - 'D': np.array([3] * 4, dtype='int32'), - 'E': pd.Categorical(["test", "train", "test", "train"]), - 'F': 'foo'}) + df2 = pd.DataFrame( + { + "A": 1.0, + "B": pd.Timestamp("20130102"), + "C": pd.Series(1, index=list(range(4)), dtype="float32"), + "D": np.array([3] * 4, dtype="int32"), + "E": pd.Categorical(["test", "train", "test", "train"]), + "F": "foo", + } + ) df2 The columns of the resulting :class:`DataFrame` have different @@ -512,12 +516,14 @@ See the :ref:`Grouping section `. .. ipython:: python - df = pd.DataFrame({'A': ['foo', 'bar', 'foo', 'bar', - 'foo', 'bar', 'foo', 'foo'], - 'B': ['one', 'one', 'two', 'three', - 'two', 'two', 'one', 'three'], - 'C': np.random.randn(8), - 'D': np.random.randn(8)}) + df = pd.DataFrame( + { + "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], + "B": ["one", "one", "two", "three", "two", "two", "one", "three"], + "C": np.random.randn(8), + "D": np.random.randn(8), + } + ) df Grouping and then applying the :meth:`~pandas.core.groupby.GroupBy.sum` function to the resulting @@ -545,10 +551,14 @@ Stack .. ipython:: python - tuples = list(zip(*[['bar', 'bar', 'baz', 'baz', - 'foo', 'foo', 'qux', 'qux'], - ['one', 'two', 'one', 'two', - 'one', 'two', 'one', 'two']])) + tuples = list( + zip( + *[ + ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"], + ["one", "two", "one", "two", "one", "two", "one", "two"], + ] + ) + ) index = pd.MultiIndex.from_tuples(tuples, names=['first', 'second']) df = pd.DataFrame(np.random.randn(8, 2), index=index, columns=['A', 'B']) df2 = df[:4] @@ -578,11 +588,15 @@ See the section on :ref:`Pivot Tables `. .. ipython:: python - df = pd.DataFrame({'A': ['one', 'one', 'two', 'three'] * 3, - 'B': ['A', 'B', 'C'] * 4, - 'C': ['foo', 'foo', 'foo', 'bar', 'bar', 'bar'] * 2, - 'D': np.random.randn(12), - 'E': np.random.randn(12)}) + df = pd.DataFrame( + { + "A": ["one", "one", "two", "three"] * 3, + "B": ["A", "B", "C"] * 4, + "C": ["foo", "foo", "foo", "bar", "bar", "bar"] * 2, + "D": np.random.randn(12), + "E": np.random.randn(12), + } + ) df We can produce pivot tables from this data very easily: @@ -653,8 +667,10 @@ pandas can include categorical data in a :class:`DataFrame`. For full docs, see .. ipython:: python - df = pd.DataFrame({"id": [1, 2, 3, 4, 5, 6], - "raw_grade": ['a', 'b', 'b', 'a', 'a', 'e']}) + df = pd.DataFrame( + {"id": [1, 2, 3, 4, 5, 6], "raw_grade": ["a", "b", "b", "a", "a", "e"]} + ) + Convert the raw grades to a categorical data type. @@ -674,8 +690,9 @@ Reorder the categories and simultaneously add the missing categories (methods un .. ipython:: python - df["grade"] = df["grade"].cat.set_categories(["very bad", "bad", "medium", - "good", "very good"]) + df["grade"] = df["grade"].cat.set_categories( + ["very bad", "bad", "medium","good", "very good"] + ) df["grade"] Sorting is per order in the categories, not lexical order. @@ -705,8 +722,7 @@ We use the standard convention for referencing the matplotlib API: .. ipython:: python - ts = pd.Series(np.random.randn(1000), - index=pd.date_range('1/1/2000', periods=1000)) + ts = pd.Series(np.random.randn(1000), index=pd.date_range("1/1/2000", periods=1000)) ts = ts.cumsum() @savefig series_plot_basic.png @@ -717,8 +733,10 @@ of the columns with labels: .. ipython:: python - df = pd.DataFrame(np.random.randn(1000, 4), index=ts.index, - columns=['A', 'B', 'C', 'D']) + df = pd.DataFrame( + np.random.randn(1000, 4), index=ts.index, columns=["A", "B", "C", "D"] + ) + df = df.cumsum() plt.figure() diff --git a/doc/source/user_guide/sparse.rst b/doc/source/user_guide/sparse.rst index 35e0e0fb86472..62e35cb994faf 100644 --- a/doc/source/user_guide/sparse.rst +++ b/doc/source/user_guide/sparse.rst @@ -303,14 +303,17 @@ The method requires a ``MultiIndex`` with two or more levels. .. ipython:: python s = pd.Series([3.0, np.nan, 1.0, 3.0, np.nan, np.nan]) - s.index = pd.MultiIndex.from_tuples([(1, 2, 'a', 0), - (1, 2, 'a', 1), - (1, 1, 'b', 0), - (1, 1, 'b', 1), - (2, 1, 'b', 0), - (2, 1, 'b', 1)], - names=['A', 'B', 'C', 'D']) - s + s.index = pd.MultiIndex.from_tuples( + [ + (1, 2, "a", 0), + (1, 2, "a", 1), + (1, 1, "b", 0), + (1, 1, "b", 1), + (2, 1, "b", 0), + (2, 1, "b", 1), + ], + names=["A", "B", "C", "D"], + ) ss = s.astype('Sparse') ss @@ -318,9 +321,10 @@ In the example below, we transform the ``Series`` to a sparse representation of .. ipython:: python - A, rows, columns = ss.sparse.to_coo(row_levels=['A', 'B'], - column_levels=['C', 'D'], - sort_labels=True) + A, rows, columns = ss.sparse.to_coo( + row_levels=["A", "B"], column_levels=["C", "D"], sort_labels=True + ) + A A.todense() @@ -331,9 +335,9 @@ Specifying different row and column labels (and not sorting them) yields a diffe .. ipython:: python - A, rows, columns = ss.sparse.to_coo(row_levels=['A', 'B', 'C'], - column_levels=['D'], - sort_labels=False) + A, rows, columns = ss.sparse.to_coo( + row_levels=["A", "B", "C"], column_levels=["D"], sort_labels=False + ) A A.todense() @@ -345,8 +349,7 @@ A convenience method :meth:`Series.sparse.from_coo` is implemented for creating .. ipython:: python from scipy import sparse - A = sparse.coo_matrix(([3.0, 1.0, 2.0], ([1, 0, 0], [0, 2, 3])), - shape=(3, 4)) + A = sparse.coo_matrix(([3.0, 1.0, 2.0], ([1, 0, 0], [0, 2, 3])), shape=(3, 4)) A A.todense() From 9226749ab0fb38fc0403bb738c2665625e214a0d Mon Sep 17 00:00:00 2001 From: Erfan Nariman Date: Thu, 1 Oct 2020 23:07:34 +0200 Subject: [PATCH 4/6] all changes with black --- .../intro_tutorials/06_calculate_statistics.rst | 7 +++---- .../intro_tutorials/07_reshape_table_layout.rst | 17 +++++------------ .../intro_tutorials/08_combine_dataframes.rst | 12 ++---------- .../intro_tutorials/10_text_data.rst | 4 +--- 4 files changed, 11 insertions(+), 29 deletions(-) diff --git a/doc/source/getting_started/intro_tutorials/06_calculate_statistics.rst b/doc/source/getting_started/intro_tutorials/06_calculate_statistics.rst index c7cb54cddb2bf..7e919777fdf03 100644 --- a/doc/source/getting_started/intro_tutorials/06_calculate_statistics.rst +++ b/doc/source/getting_started/intro_tutorials/06_calculate_statistics.rst @@ -122,10 +122,9 @@ aggregating statistics for given columns can be defined using the .. ipython:: python - titanic.agg({ - "Age": ["min", "max", "median", "skew"], - "Fare": ["min", "max", "median", "mean"] - }) + titanic.agg( + {"Age": ["min", "max", "median", "skew"], "Fare": ["min", "max", "median", "mean"]} + ) .. raw:: html diff --git a/doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst b/doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst index a04a2a5743de5..706456b47273a 100644 --- a/doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst +++ b/doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst @@ -102,9 +102,7 @@ measurement. .. ipython:: python air_quality = pd.read_csv( - "data/air_quality_long.csv", - index_col="date.utc", - parse_dates=True + "data/air_quality_long.csv", index_col="date.utc", parse_dates=True ) air_quality.head() @@ -251,9 +249,8 @@ I want the mean concentrations for :math:`NO_2` and :math:`PM_{2.5}` in each of .. ipython:: python air_quality.pivot_table( - values="value", index="location", - columns="parameter", aggfunc="mean" -) + values="value", index="location", columns="parameter", aggfunc="mean" + ) In the case of :meth:`~DataFrame.pivot`, the data is only rearranged. When multiple values need to be aggregated (in this specific case, the values on @@ -356,13 +353,9 @@ The :func:`pandas.melt` method can be defined in more detail: no_2 = no2_pivoted.melt( id_vars="date.utc", - value_vars=[ - "BETR801", - "FR04014", - "London Westminster" - ], + value_vars=["BETR801", "FR04014", "London Westminster"], value_name="NO_2", - var_name="id_location" + var_name="id_location", ) no_2.head() diff --git a/doc/source/getting_started/intro_tutorials/08_combine_dataframes.rst b/doc/source/getting_started/intro_tutorials/08_combine_dataframes.rst index ecbfc61cfb4d3..be4c284912db4 100644 --- a/doc/source/getting_started/intro_tutorials/08_combine_dataframes.rst +++ b/doc/source/getting_started/intro_tutorials/08_combine_dataframes.rst @@ -155,10 +155,7 @@ index. For example: .. ipython:: python - air_quality_ = pd.concat([ - air_quality_pm25, air_quality_no2], - keys=["PM25", "NO2" - ]) + air_quality_ = pd.concat([air_quality_pm25, air_quality_no2], keys=["PM25", "NO2"]) .. ipython:: python @@ -235,12 +232,7 @@ Add the station coordinates, provided by the stations metadata table, to the cor .. ipython:: python - air_quality = pd.merge( - air_quality, - stations_coord, - how='left', - on='location' - ) + air_quality = pd.merge(air_quality, stations_coord, how="left", on="location") air_quality.head() Using the :meth:`~pandas.merge` function, for each of the rows in the diff --git a/doc/source/getting_started/intro_tutorials/10_text_data.rst b/doc/source/getting_started/intro_tutorials/10_text_data.rst index a8ad4b5e03788..b7fb99a98d78f 100644 --- a/doc/source/getting_started/intro_tutorials/10_text_data.rst +++ b/doc/source/getting_started/intro_tutorials/10_text_data.rst @@ -224,9 +224,7 @@ In the "Sex" column, replace values of "male" by "M" and values of "female" by " .. ipython:: python - titanic["Sex_short"] = titanic["Sex"].replace({ - "male": "M", "female": "F" - }) + titanic["Sex_short"] = titanic["Sex"].replace({"male": "M", "female": "F"}) titanic["Sex_short"] Whereas :meth:`~Series.replace` is not a string method, it provides a convenient way From 8705ae9895b5ea3cc6439eb31772d896d95f9cae Mon Sep 17 00:00:00 2001 From: Erfan Nariman Date: Thu, 1 Oct 2020 23:39:01 +0200 Subject: [PATCH 5/6] add whitespace --- doc/source/user_guide/10min.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/user_guide/10min.rst b/doc/source/user_guide/10min.rst index b59a684faea06..673f8689736f1 100644 --- a/doc/source/user_guide/10min.rst +++ b/doc/source/user_guide/10min.rst @@ -691,7 +691,7 @@ Reorder the categories and simultaneously add the missing categories (methods un .. ipython:: python df["grade"] = df["grade"].cat.set_categories( - ["very bad", "bad", "medium","good", "very good"] + ["very bad", "bad", "medium", "good", "very good"] ) df["grade"] From 93404d69e4d1d2633e22bdb226a732f355363ecb Mon Sep 17 00:00:00 2001 From: Daniel Saxton <2658661+dsaxton@users.noreply.github.com> Date: Thu, 1 Oct 2020 18:38:28 -0500 Subject: [PATCH 6/6] Update doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst --- .../getting_started/intro_tutorials/07_reshape_table_layout.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst b/doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst index 706456b47273a..20c36133330c4 100644 --- a/doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst +++ b/doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst @@ -273,7 +273,7 @@ the ``margin`` parameter to ``True``: index="location", columns="parameter", aggfunc="mean", - margins=True + margins=True, ) .. raw:: html