From c3e899ec98f0c21bf76d3f53d8bbcc25e158c380 Mon Sep 17 00:00:00 2001 From: alimcmaster1 Date: Sat, 2 Nov 2019 11:39:52 +0000 Subject: [PATCH 1/4] Fix basics docs --- setup.cfg | 1 - 1 file changed, 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index d4657100c1291..2dcb46584f19e 100644 --- a/setup.cfg +++ b/setup.cfg @@ -47,7 +47,6 @@ ignore = E402, # module level import not at top of file E711, # comparison to none should be 'if cond is none:' exclude = - doc/source/getting_started/basics.rst doc/source/development/contributing_docstring.rst From c3ddd7b1d83f3ba125b6b78def97585b252588e6 Mon Sep 17 00:00:00 2001 From: alimcmaster1 Date: Sat, 2 Nov 2019 17:11:13 +0000 Subject: [PATCH 2/4] Fix pipe example in basics --- doc/source/getting_started/basics.rst | 44 ++++++++++++++++++++------- 1 file changed, 33 insertions(+), 11 deletions(-) diff --git a/doc/source/getting_started/basics.rst b/doc/source/getting_started/basics.rst index 9b97aa25a9240..6f36e4085abb1 100644 --- a/doc/source/getting_started/basics.rst +++ b/doc/source/getting_started/basics.rst @@ -753,28 +753,50 @@ on an entire ``DataFrame`` or ``Series``, row- or column-wise, or elementwise. Tablewise function application ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -``DataFrames`` and ``Series`` can of course just be passed into functions. +``DataFrames`` and ``Series`` can be passed into functions. However, if the function needs to be called in a chain, consider using the :meth:`~DataFrame.pipe` method. -Compare the following -.. code-block:: python +First some setup: + +.. ipython:: python - # f, g, and h are functions taking and returning ``DataFrames`` - >>> f(g(h(df), arg1=1), arg2=2, arg3=3) + def extract_city_name(df): + """ + Chicago, IL -> Chicago for city_name column + """ + df['city_name'] = df['city_and_code'].apply(lambda x: x.split(', ')[0]) + return df -with the equivalent + def add_country_name(df, country_name=None): + """ + Chicago -> Chicago-US for city_name column + """ + df['city_and_country'] = df['city_name'].apply(lambda x: f'{x}-{country_name}') + return df -.. code-block:: python + df_p = pd.DataFrame({'city_and_code': ['Chicago, IL']}) + + +``extract_city_name`` and ``add_country_name`` are functions taking and returning ``DataFrames``. + +Now compare the following: + +.. ipython:: python + + add_country_name(extract_city_name(df_p), country_name='US') + +Is equivalent to: + +.. ipython:: python - >>> (df.pipe(h) - ... .pipe(g, arg1=1) - ... .pipe(f, arg2=2, arg3=3)) + (df_p.pipe(extract_city_name) + .pipe(add_country_name, country_name="US")) Pandas encourages the second style, which is known as method chaining. ``pipe`` makes it easy to use your own or another library's functions in method chains, alongside pandas' methods. -In the example above, the functions ``f``, ``g``, and ``h`` each expected the ``DataFrame`` as the first positional argument. +In the example above, the functions ``extract_city_name`` and ``add_country_name`` each expected a ``DataFrame`` as the first positional argument. What if the function you wish to apply takes its data as, say, the second argument? In this case, provide ``pipe`` with a tuple of ``(callable, data_keyword)``. ``.pipe`` will route the ``DataFrame`` to the argument specified in the tuple. From 0cf357176b4489f585163c766443bdad536925ed Mon Sep 17 00:00:00 2001 From: alimcmaster1 Date: Sat, 2 Nov 2019 17:41:52 +0000 Subject: [PATCH 3/4] flake8-rst --- doc/source/getting_started/basics.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/doc/source/getting_started/basics.rst b/doc/source/getting_started/basics.rst index 6f36e4085abb1..969327334f99f 100644 --- a/doc/source/getting_started/basics.rst +++ b/doc/source/getting_started/basics.rst @@ -771,7 +771,8 @@ First some setup: """ Chicago -> Chicago-US for city_name column """ - df['city_and_country'] = df['city_name'].apply(lambda x: f'{x}-{country_name}') + col = 'city_name' + df['city_and_country'] = df[col].apply(lambda x: f'{x}-{country_name}') return df df_p = pd.DataFrame({'city_and_code': ['Chicago, IL']}) @@ -790,7 +791,7 @@ Is equivalent to: .. ipython:: python (df_p.pipe(extract_city_name) - .pipe(add_country_name, country_name="US")) + .pipe(add_country_name, country_name="US")) Pandas encourages the second style, which is known as method chaining. ``pipe`` makes it easy to use your own or another library's functions From 655e0c5f5cd707a61706cf507a3bbcf3158a4ce4 Mon Sep 17 00:00:00 2001 From: alimcmaster1 Date: Mon, 4 Nov 2019 23:00:16 +0000 Subject: [PATCH 4/4] Update as per comments --- doc/source/getting_started/basics.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/getting_started/basics.rst b/doc/source/getting_started/basics.rst index 969327334f99f..125990f7cadcd 100644 --- a/doc/source/getting_started/basics.rst +++ b/doc/source/getting_started/basics.rst @@ -764,7 +764,7 @@ First some setup: """ Chicago, IL -> Chicago for city_name column """ - df['city_name'] = df['city_and_code'].apply(lambda x: x.split(', ')[0]) + df['city_name'] = df['city_and_code'].str.split(",").str.get(0) return df def add_country_name(df, country_name=None): @@ -772,7 +772,7 @@ First some setup: Chicago -> Chicago-US for city_name column """ col = 'city_name' - df['city_and_country'] = df[col].apply(lambda x: f'{x}-{country_name}') + df['city_and_country'] = df[col] + country_name return df df_p = pd.DataFrame({'city_and_code': ['Chicago, IL']})