From 2b2aaa87acdf3fc92b759759bbed19e5b228d6e5 Mon Sep 17 00:00:00 2001 From: rotuna Date: Mon, 8 Oct 2018 16:52:59 +0530 Subject: [PATCH 01/18] DOC: Added example for Excel Filters and Fill handle --- doc/source/comparison_with_excel.rst | 121 +++++++++++++++++++++++++++ 1 file changed, 121 insertions(+) create mode 100644 doc/source/comparison_with_excel.rst diff --git a/doc/source/comparison_with_excel.rst b/doc/source/comparison_with_excel.rst new file mode 100644 index 0000000000000..c3f9ed2997be9 --- /dev/null +++ b/doc/source/comparison_with_excel.rst @@ -0,0 +1,121 @@ +.. currentmodule:: pandas +.. _compare_with_excel: + +.. ipython:: python + :suppress: + + import pandas as pd + import random + pd.options.display.max_rows=15 + +Comparison with Excel +********************* + +Commonly used Excel functionalities +----------------------------------- + +Fill Handle +~~~~~~~~~~~ + +Create a series of numbers following a set pattern in a certain set of cells. In +Excel this would be done by shift+drag after entering the first number or by +entering the first two or three values and then dragging. + +This can be achieved by creating a series and assigning it to the desired cells. + +.. ipython:: python + + df = pd.DataFrame({'AAA': [1] * 8, 'BBB': list(range(0, 8))}); df + + series = list(range(1, 5)); series + + df.iloc[2:(5+1)].AAA = series + + df + +Filters +~~~~~~~ + +Filters can be achieved by using slicing. + +The examples filter by 0 on column AAA, and also show how to filter by multiple +values. + +.. ipython:: python + + df[df.AAA == 0] + + df[(df.AAA == 0) | (df.AAA == 2)] + + +Drop Duplicates +~~~~~~~~~~~~~~~ + +Another commonly used function is Drop Duplicates. This is directly supported in +pandas. + +.. ipython:: python + + df = pd.DataFrame({"class": ['A', 'A', 'A', 'B', 'C', 'D'], "student_count": [42, 35, 42, 50, 47, 45], "all_pass": ["Yes", "Yes", "Yes", "No", "No", "Yes"]}) + + df.drop_duplicates() + + df.drop_duplicates(["class", "student_count"]) + + +Pivot Table +~~~~~~~~~~~ + +This can be achieved by using ``pandas.pivot_table`` for examples and reference, +please see `pandas.pivot_table `__ + + +Formulae +~~~~~~~~ + +Let's create a new column "girls_count" and try to compute the number of boys in +each class. + +.. ipython:: python + + df["girls_count"] = [21, 12, 21, 31, 23, 17]; df + + def get_count(row): + return row["student_count"] - row["girls_count"] + + df["boys_count"] = df.apply(get_count, axis = 1); df + + +VLOOKUP +~~~~~~~ + +.. ipython:: python + + df1 = pd.DataFrame({"keys": [1, 2, 3, 4, 5, 6, 7], "first_names": ["harry", "ron", + "hermione", "rubius", "albus", "severus", "luna"]}); df1 + + random_names = pd.DataFrame({"surnames": ["hadrid", "malfoy", "lovegood", + "dumbledore", "grindelwald", "granger", "weasly", "riddle", "longbottom", + "snape"], "keys": [ random.randint(1,7) for x in range(0,10) ]}) + + random_names + + random_names.merge(df1, on="keys", how='left') + +Adding a row +~~~~~~~~~~~~ + +To appended a row, we can just assign values to an index using ``iloc``. + +NOTE: If the index already exists, the values in that index will be over written. + +.. ipython:: python + + df1.iloc[7] = [8, "tonks"]; df1 + + +Search and Replace +~~~~~~~~~~~~~~~~~~ + +The ``replace`` method that comes associated with the ``DataFrame`` object can perform +this function. Please see `pandas.DataFrame.replace `__ for examples. From ea23c33644286e61b2e9af9a361211071cc1254b Mon Sep 17 00:00:00 2001 From: Aidan Feldman Date: Tue, 24 Nov 2020 00:24:57 -0500 Subject: [PATCH 02/18] DOC: move the comparison with Excel into the new folder --- .../comparison}/comparison_with_excel.rst | 23 +++++++++---------- 1 file changed, 11 insertions(+), 12 deletions(-) rename doc/source/{ => getting_started/comparison}/comparison_with_excel.rst (90%) diff --git a/doc/source/comparison_with_excel.rst b/doc/source/getting_started/comparison/comparison_with_excel.rst similarity index 90% rename from doc/source/comparison_with_excel.rst rename to doc/source/getting_started/comparison/comparison_with_excel.rst index c3f9ed2997be9..6e8c1f1e55c15 100644 --- a/doc/source/comparison_with_excel.rst +++ b/doc/source/getting_started/comparison/comparison_with_excel.rst @@ -1,10 +1,9 @@ -.. currentmodule:: pandas .. _compare_with_excel: .. ipython:: python :suppress: - import pandas as pd + import pandas as pd import random pd.options.display.max_rows=15 @@ -18,7 +17,7 @@ Fill Handle ~~~~~~~~~~~ Create a series of numbers following a set pattern in a certain set of cells. In -Excel this would be done by shift+drag after entering the first number or by +Excel this would be done by shift+drag after entering the first number or by entering the first two or three values and then dragging. This can be achieved by creating a series and assigning it to the desired cells. @@ -36,7 +35,7 @@ This can be achieved by creating a series and assigning it to the desired cells. Filters ~~~~~~~ -Filters can be achieved by using slicing. +Filters can be achieved by using slicing. The examples filter by 0 on column AAA, and also show how to filter by multiple values. @@ -51,7 +50,7 @@ values. Drop Duplicates ~~~~~~~~~~~~~~~ -Another commonly used function is Drop Duplicates. This is directly supported in +Another commonly used function is Drop Duplicates. This is directly supported in pandas. .. ipython:: python @@ -66,17 +65,17 @@ pandas. Pivot Table ~~~~~~~~~~~ -This can be achieved by using ``pandas.pivot_table`` for examples and reference, -please see `pandas.pivot_table `__ +This can be achieved by using ``pandas.pivot_table`` for examples and reference, +please see `pandas.pivot_table `__ Formulae ~~~~~~~~ -Let's create a new column "girls_count" and try to compute the number of boys in -each class. +Let's create a new column "girls_count" and try to compute the number of boys in +each class. -.. ipython:: python +.. ipython:: python df["girls_count"] = [21, 12, 21, 31, 23, 17]; df @@ -105,7 +104,7 @@ VLOOKUP Adding a row ~~~~~~~~~~~~ -To appended a row, we can just assign values to an index using ``iloc``. +To appended a row, we can just assign values to an index using ``iloc``. NOTE: If the index already exists, the values in that index will be over written. @@ -117,5 +116,5 @@ NOTE: If the index already exists, the values in that index will be over written Search and Replace ~~~~~~~~~~~~~~~~~~ -The ``replace`` method that comes associated with the ``DataFrame`` object can perform +The ``replace`` method that comes associated with the ``DataFrame`` object can perform this function. Please see `pandas.DataFrame.replace `__ for examples. From 799ea8b003c5e65f2645a1f2830837f857666768 Mon Sep 17 00:00:00 2001 From: Aidan Feldman Date: Wed, 16 Dec 2020 01:42:39 -0500 Subject: [PATCH 03/18] DOC: add introduction to Excel page --- .../getting_started/comparison/comparison_with_excel.rst | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/doc/source/getting_started/comparison/comparison_with_excel.rst b/doc/source/getting_started/comparison/comparison_with_excel.rst index 6e8c1f1e55c15..ff33ecf842284 100644 --- a/doc/source/getting_started/comparison/comparison_with_excel.rst +++ b/doc/source/getting_started/comparison/comparison_with_excel.rst @@ -10,6 +10,14 @@ Comparison with Excel ********************* +Since many potential pandas users have some familiarity with `Excel +`_, this page is meant to provide some examples of how +various Excel operations would be performed using pandas. Much of this will be the +same/similar in `Google Sheets `_, `LibreOffice +Calc `_, `Apple +Numbers `_, and other +Excel-compatible spreadsheet software. + Commonly used Excel functionalities ----------------------------------- From ed3951c9a479983c06675ddfb1ac68a796dce34e Mon Sep 17 00:00:00 2001 From: Aidan Feldman Date: Wed, 16 Dec 2020 01:56:38 -0500 Subject: [PATCH 04/18] DOC: have excel page boilerplate match other comparison pages --- .../comparison/comparison_with_excel.rst | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/doc/source/getting_started/comparison/comparison_with_excel.rst b/doc/source/getting_started/comparison/comparison_with_excel.rst index ff33ecf842284..9ad9038c45192 100644 --- a/doc/source/getting_started/comparison/comparison_with_excel.rst +++ b/doc/source/getting_started/comparison/comparison_with_excel.rst @@ -1,11 +1,6 @@ .. _compare_with_excel: -.. ipython:: python - :suppress: - - import pandas as pd - import random - pd.options.display.max_rows=15 +{{ header }} Comparison with Excel ********************* @@ -18,6 +13,16 @@ Calc `_, and other Excel-compatible spreadsheet software. +If you're new to pandas, you might want to first read through :ref:`10 Minutes to pandas<10min>` +to familiarize yourself with the library. + +As is customary, we import pandas and NumPy as follows: + +.. ipython:: python + + import pandas as pd + import numpy as np + Commonly used Excel functionalities ----------------------------------- @@ -98,6 +103,8 @@ VLOOKUP .. ipython:: python + import random + df1 = pd.DataFrame({"keys": [1, 2, 3, 4, 5, 6, 7], "first_names": ["harry", "ron", "hermione", "rubius", "albus", "severus", "luna"]}); df1 From d53b7efd8cde5811ea15f7a66a6e37994d387925 Mon Sep 17 00:00:00 2001 From: Aidan Feldman Date: Wed, 16 Dec 2020 02:03:40 -0500 Subject: [PATCH 05/18] DOC: make shared include for the boilerplate text in comparison docs --- .../comparison/comparison_boilerplate.rst | 9 +++++++++ .../comparison/comparison_with_excel.rst | 10 +--------- .../comparison/comparison_with_sas.rst | 11 +---------- .../comparison/comparison_with_sql.rst | 10 +--------- .../comparison/comparison_with_stata.rst | 12 +----------- 5 files changed, 13 insertions(+), 39 deletions(-) create mode 100644 doc/source/getting_started/comparison/comparison_boilerplate.rst diff --git a/doc/source/getting_started/comparison/comparison_boilerplate.rst b/doc/source/getting_started/comparison/comparison_boilerplate.rst new file mode 100644 index 0000000000000..aedf2875dc452 --- /dev/null +++ b/doc/source/getting_started/comparison/comparison_boilerplate.rst @@ -0,0 +1,9 @@ +If you're new to pandas, you might want to first read through :ref:`10 Minutes to pandas<10min>` +to familiarize yourself with the library. + +As is customary, we import pandas and NumPy as follows: + +.. ipython:: python + + import pandas as pd + import numpy as np diff --git a/doc/source/getting_started/comparison/comparison_with_excel.rst b/doc/source/getting_started/comparison/comparison_with_excel.rst index 9ad9038c45192..44a244fbe73d5 100644 --- a/doc/source/getting_started/comparison/comparison_with_excel.rst +++ b/doc/source/getting_started/comparison/comparison_with_excel.rst @@ -13,15 +13,7 @@ Calc `_, and other Excel-compatible spreadsheet software. -If you're new to pandas, you might want to first read through :ref:`10 Minutes to pandas<10min>` -to familiarize yourself with the library. - -As is customary, we import pandas and NumPy as follows: - -.. ipython:: python - - import pandas as pd - import numpy as np +.. include:: comparison_boilerplate.rst Commonly used Excel functionalities ----------------------------------- diff --git a/doc/source/getting_started/comparison/comparison_with_sas.rst b/doc/source/getting_started/comparison/comparison_with_sas.rst index ae9f1caebd556..721ce53d110f0 100644 --- a/doc/source/getting_started/comparison/comparison_with_sas.rst +++ b/doc/source/getting_started/comparison/comparison_with_sas.rst @@ -8,16 +8,7 @@ For potential users coming from `SAS ` -to familiarize yourself with the library. - -As is customary, we import pandas and NumPy as follows: - -.. ipython:: python - - import pandas as pd - import numpy as np - +.. include:: comparison_boilerplate.rst .. note:: diff --git a/doc/source/getting_started/comparison/comparison_with_sql.rst b/doc/source/getting_started/comparison/comparison_with_sql.rst index 6848d8df2e46b..4fe7b7e96cf50 100644 --- a/doc/source/getting_started/comparison/comparison_with_sql.rst +++ b/doc/source/getting_started/comparison/comparison_with_sql.rst @@ -8,15 +8,7 @@ Since many potential pandas users have some familiarity with `SQL `_, this page is meant to provide some examples of how various SQL operations would be performed using pandas. -If you're new to pandas, you might want to first read through :ref:`10 Minutes to pandas<10min>` -to familiarize yourself with the library. - -As is customary, we import pandas and NumPy as follows: - -.. ipython:: python - - import pandas as pd - import numpy as np +.. include:: comparison_boilerplate.rst Most of the examples will utilize the ``tips`` dataset found within pandas tests. We'll read the data into a DataFrame called ``tips`` and assume we have a database table of the same name and diff --git a/doc/source/getting_started/comparison/comparison_with_stata.rst b/doc/source/getting_started/comparison/comparison_with_stata.rst index 014506cc18327..e78824e9b3f64 100644 --- a/doc/source/getting_started/comparison/comparison_with_stata.rst +++ b/doc/source/getting_started/comparison/comparison_with_stata.rst @@ -8,17 +8,7 @@ For potential users coming from `Stata `__ this page is meant to demonstrate how different Stata operations would be performed in pandas. -If you're new to pandas, you might want to first read through :ref:`10 Minutes to pandas<10min>` -to familiarize yourself with the library. - -As is customary, we import pandas and NumPy as follows. This means that we can refer to the -libraries as ``pd`` and ``np``, respectively, for the rest of the document. - -.. ipython:: python - - import pandas as pd - import numpy as np - +.. include:: comparison_boilerplate.rst .. note:: From 39f030f4fc0a1868e4106469c6355683a7467ba7 Mon Sep 17 00:00:00 2001 From: Aidan Feldman Date: Wed, 16 Dec 2020 02:27:07 -0500 Subject: [PATCH 06/18] DOC: add links to Excel page from Getting Started --- doc/source/_static/logo_excel.svg | 27 +++++++++++++++++++ .../getting_started/comparison/index.rst | 1 + doc/source/getting_started/index.rst | 16 +++++++++++ 3 files changed, 44 insertions(+) create mode 100644 doc/source/_static/logo_excel.svg diff --git a/doc/source/_static/logo_excel.svg b/doc/source/_static/logo_excel.svg new file mode 100644 index 0000000000000..ffb25108df67c --- /dev/null +++ b/doc/source/_static/logo_excel.svg @@ -0,0 +1,27 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/source/getting_started/comparison/index.rst b/doc/source/getting_started/comparison/index.rst index 998706ce0c639..9b6eec7c5d42e 100644 --- a/doc/source/getting_started/comparison/index.rst +++ b/doc/source/getting_started/comparison/index.rst @@ -11,5 +11,6 @@ Comparison with other tools comparison_with_r comparison_with_sql + comparison_with_excel comparison_with_sas comparison_with_stata diff --git a/doc/source/getting_started/index.rst b/doc/source/getting_started/index.rst index 6f6eeada0cfed..fdd3bab487eef 100644 --- a/doc/source/getting_started/index.rst +++ b/doc/source/getting_started/index.rst @@ -619,6 +619,22 @@ the pandas-equivalent operations compared to software you already know: :ref:`Learn more ` +.. raw:: html + + + + +
+
+ Excel logo +
+

Users of Excel + or other spreadsheet programs will find that many of the concepts are transferrable to pandas.

+ +.. container:: custom-button + + :ref:`Learn more ` + .. raw:: html
From 6be29921b1c007900de866bfef1aceb4146fbe24 Mon Sep 17 00:00:00 2001 From: Aidan Feldman Date: Wed, 16 Dec 2020 03:17:45 -0500 Subject: [PATCH 07/18] DOC: add section on Data Structures to Excel page --- .../comparison/comparison_with_excel.rst | 46 +++++++++++++++++++ .../comparison/comparison_with_sas.rst | 7 ++- .../comparison/comparison_with_stata.rst | 7 ++- 3 files changed, 56 insertions(+), 4 deletions(-) diff --git a/doc/source/getting_started/comparison/comparison_with_excel.rst b/doc/source/getting_started/comparison/comparison_with_excel.rst index 44a244fbe73d5..c3a75e13b00d7 100644 --- a/doc/source/getting_started/comparison/comparison_with_excel.rst +++ b/doc/source/getting_started/comparison/comparison_with_excel.rst @@ -15,6 +15,52 @@ Excel-compatible spreadsheet software. .. include:: comparison_boilerplate.rst +Data structures +--------------- + +General terminology translation +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. csv-table:: + :header: "pandas", "Excel" + :widths: 20, 20 + + ``DataFrame``, worksheet + ``Series``, column + ``Index``, row headings + row, row + ``NaN``, empty cell + +``DataFrame`` +~~~~~~~~~~~~~ + +A ``DataFrame`` in pandas is analogous to an Excel worksheet. While an Excel worksheet can contain +multiple worksheets, pandas ``DataFrame``s exist independently. + +``Series`` +~~~~~~~~~~ + +A ``Series`` is the data structure that represents one column of a ``DataFrame``. Working with a +``Series`` is analogous to referencing a column of a spreadsheet. + +``Index`` +~~~~~~~~~ + +Every ``DataFrame`` and ``Series`` has an ``Index``, which are labels on the *rows* of the data. In +pandas, if no index is specified, an integer index is used by default (first row = 0, second row = +1, and so on), analogous to row headings/numbers in Excel. + +In pandas, indexes can be set to one (or multiple) unique values, which is like having a column that +use use as the row identifier in a worksheet. Unlike Excel, these ``Index`` values can actually be +used to reference the rows. For example, in Excel, you would reference the first row as ``A1:Z1``, +while in pandas you could use ``populations.loc['Chicago']``. + +Index values are also persistent, so if you re-order the rows in a ``DataFrame``, the label for a +particular row don't change. + +See the :ref:`indexing documentation` for much more on how to use an ``Index`` +effectively. + Commonly used Excel functionalities ----------------------------------- diff --git a/doc/source/getting_started/comparison/comparison_with_sas.rst b/doc/source/getting_started/comparison/comparison_with_sas.rst index 721ce53d110f0..c6f508aae0e21 100644 --- a/doc/source/getting_started/comparison/comparison_with_sas.rst +++ b/doc/source/getting_started/comparison/comparison_with_sas.rst @@ -39,14 +39,17 @@ General terminology translation ``NaN``, ``.`` -``DataFrame`` / ``Series`` -~~~~~~~~~~~~~~~~~~~~~~~~~~ +``DataFrame`` +~~~~~~~~~~~~~ A ``DataFrame`` in pandas is analogous to a SAS data set - a two-dimensional data source with labeled columns that can be of different types. As will be shown in this document, almost any operation that can be applied to a data set using SAS's ``DATA`` step, can also be accomplished in pandas. +``Series`` +~~~~~~~~~~ + A ``Series`` is the data structure that represents one column of a ``DataFrame``. SAS doesn't have a separate data structure for a single column, but in general, working with a ``Series`` is analogous to referencing a column diff --git a/doc/source/getting_started/comparison/comparison_with_stata.rst b/doc/source/getting_started/comparison/comparison_with_stata.rst index e78824e9b3f64..b3ed9b1ba630f 100644 --- a/doc/source/getting_started/comparison/comparison_with_stata.rst +++ b/doc/source/getting_started/comparison/comparison_with_stata.rst @@ -38,14 +38,17 @@ General terminology translation ``NaN``, ``.`` -``DataFrame`` / ``Series`` -~~~~~~~~~~~~~~~~~~~~~~~~~~ +``DataFrame`` +~~~~~~~~~~~~~ A ``DataFrame`` in pandas is analogous to a Stata data set -- a two-dimensional data source with labeled columns that can be of different types. As will be shown in this document, almost any operation that can be applied to a data set in Stata can also be accomplished in pandas. +``Series`` +~~~~~~~~~~ + A ``Series`` is the data structure that represents one column of a ``DataFrame``. Stata doesn't have a separate data structure for a single column, but in general, working with a ``Series`` is analogous to referencing a column From 22f9f4f3e10ddabe25847eca162d275b2924e501 Mon Sep 17 00:00:00 2001 From: Aidan Feldman Date: Fri, 18 Dec 2020 00:33:47 -0500 Subject: [PATCH 08/18] DOC: fix CI errors on Excel page - Format Excel comparison code samples with [blacken-docs](https://github.com/asottile/blacken-docs) - Fix `SettingWithCopyWarning`s --- .../comparison/comparison_with_excel.rst | 72 ++++++++++++++----- 1 file changed, 56 insertions(+), 16 deletions(-) diff --git a/doc/source/getting_started/comparison/comparison_with_excel.rst b/doc/source/getting_started/comparison/comparison_with_excel.rst index c3a75e13b00d7..1340b51dec788 100644 --- a/doc/source/getting_started/comparison/comparison_with_excel.rst +++ b/doc/source/getting_started/comparison/comparison_with_excel.rst @@ -35,7 +35,7 @@ General terminology translation ~~~~~~~~~~~~~ A ``DataFrame`` in pandas is analogous to an Excel worksheet. While an Excel worksheet can contain -multiple worksheets, pandas ``DataFrame``s exist independently. +multiple worksheets, pandas ``DataFrame``\s exist independently. ``Series`` ~~~~~~~~~~ @@ -75,11 +75,13 @@ This can be achieved by creating a series and assigning it to the desired cells. .. ipython:: python - df = pd.DataFrame({'AAA': [1] * 8, 'BBB': list(range(0, 8))}); df + df = pd.DataFrame({"AAA": [1] * 8, "BBB": list(range(0, 8))}) + df - series = list(range(1, 5)); series + series = list(range(1, 5)) + series - df.iloc[2:(5+1)].AAA = series + df.loc[2:5, "AAA"] = series df @@ -106,7 +108,13 @@ pandas. .. ipython:: python - df = pd.DataFrame({"class": ['A', 'A', 'A', 'B', 'C', 'D'], "student_count": [42, 35, 42, 50, 47, 45], "all_pass": ["Yes", "Yes", "Yes", "No", "No", "Yes"]}) + df = pd.DataFrame( + { + "class": ["A", "A", "A", "B", "C", "D"], + "student_count": [42, 35, 42, 50, 47, 45], + "all_pass": ["Yes", "Yes", "Yes", "No", "No", "Yes"], + } + ) df.drop_duplicates() @@ -128,12 +136,16 @@ each class. .. ipython:: python - df["girls_count"] = [21, 12, 21, 31, 23, 17]; df + df["girls_count"] = [21, 12, 21, 31, 23, 17] + df + def get_count(row): return row["student_count"] - row["girls_count"] - df["boys_count"] = df.apply(get_count, axis = 1); df + + df["boys_count"] = df.apply(get_count, axis=1) + df VLOOKUP @@ -143,27 +155,55 @@ VLOOKUP import random - df1 = pd.DataFrame({"keys": [1, 2, 3, 4, 5, 6, 7], "first_names": ["harry", "ron", - "hermione", "rubius", "albus", "severus", "luna"]}); df1 - - random_names = pd.DataFrame({"surnames": ["hadrid", "malfoy", "lovegood", - "dumbledore", "grindelwald", "granger", "weasly", "riddle", "longbottom", - "snape"], "keys": [ random.randint(1,7) for x in range(0,10) ]}) + df1 = pd.DataFrame( + { + "keys": [1, 2, 3, 4, 5, 6, 7], + "first_names": [ + "harry", + "ron", + "hermione", + "rubius", + "albus", + "severus", + "luna", + ], + } + ) + df1 + + random_names = pd.DataFrame( + { + "surnames": [ + "hadrid", + "malfoy", + "lovegood", + "dumbledore", + "grindelwald", + "granger", + "weasly", + "riddle", + "longbottom", + "snape", + ], + "keys": [random.randint(1, 7) for x in range(0, 10)], + } + ) random_names - random_names.merge(df1, on="keys", how='left') + random_names.merge(df1, on="keys", how="left") Adding a row ~~~~~~~~~~~~ -To appended a row, we can just assign values to an index using ``iloc``. +To appended a row, we can just assign values to an index using ``loc``. NOTE: If the index already exists, the values in that index will be over written. .. ipython:: python - df1.iloc[7] = [8, "tonks"]; df1 + df1.loc[7] = [8, "tonks"] + df1 Search and Replace From b52201468a1a51123fc76529e15cbe9a48a69203 Mon Sep 17 00:00:00 2001 From: Aidan Feldman Date: Fri, 25 Dec 2020 12:01:24 -0500 Subject: [PATCH 09/18] DOC: improve formula documentation under Getting Started - Mention apply() in documentation around deriving columns - Simplify code for doing column subtraction in Excel doc --- .../comparison/comparison_with_excel.rst | 19 ++++++++++--------- .../intro_tutorials/05_add_columns.rst | 6 ++++-- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/doc/source/getting_started/comparison/comparison_with_excel.rst b/doc/source/getting_started/comparison/comparison_with_excel.rst index 1340b51dec788..35071517436bf 100644 --- a/doc/source/getting_started/comparison/comparison_with_excel.rst +++ b/doc/source/getting_started/comparison/comparison_with_excel.rst @@ -128,25 +128,26 @@ This can be achieved by using ``pandas.pivot_table`` for examples and reference, please see `pandas.pivot_table `__ -Formulae +Formulas ~~~~~~~~ -Let's create a new column "girls_count" and try to compute the number of boys in +In spreadsheets, `formulas `_ +are often created in individual cells and then `dragged `_ +into other cells to compute them for other columns. In pandas, you'll be doing more operations on +full columns. + +As an example, let's create a new column "girls_count" and try to compute the number of boys in each class. .. ipython:: python df["girls_count"] = [21, 12, 21, 31, 23, 17] df - - - def get_count(row): - return row["student_count"] - row["girls_count"] - - - df["boys_count"] = df.apply(get_count, axis=1) + df["boys_count"] = df["student_count"] - df["girls_count"] df +Note that we aren't having to tell it to do that subtraction cell-by-cell — pandas handles that for +us. See :ref:`10min_tut_05_columns` for more information. VLOOKUP ~~~~~~~ diff --git a/doc/source/getting_started/intro_tutorials/05_add_columns.rst b/doc/source/getting_started/intro_tutorials/05_add_columns.rst index a99c2c49585c5..6c7c6faf69114 100644 --- a/doc/source/getting_started/intro_tutorials/05_add_columns.rst +++ b/doc/source/getting_started/intro_tutorials/05_add_columns.rst @@ -107,11 +107,13 @@ values in each row*. -Also other mathematical operators (+, -, \*, /) or -logical operators (<, >, =,…) work element wise. The latter was already +Also other mathematical operators (``+``, ``-``, ``\*``, ``/``) or +logical operators (``<``, ``>``, ``=``,…) work element wise. The latter was already used in the :ref:`subset data tutorial <10min_tut_03_subset>` to filter rows of a table using a conditional expression. +If you need more advanced logic, you can use arbitrary Python code via :meth:`~DataFrame.apply`. + .. raw:: html