From 2f359b9062d2ead55a44b3db1180fc2f6f187cd2 Mon Sep 17 00:00:00 2001 From: Aaron Kosel Date: Sat, 10 Mar 2018 14:07:25 -0600 Subject: [PATCH 1/8] DOC: update the DataFrame.loc[] docstring --- pandas/core/indexing.py | 48 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 45 insertions(+), 3 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index ec2874b3bae95..6f70a611e1438 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1413,10 +1413,13 @@ def _get_slice_axis(self, slice_obj, axis=None): class _LocIndexer(_LocationIndexer): - """Purely label-location based indexer for selection by label. + """ + Selects a group of rows and columns by label(s) or a boolean array. ``.loc[]`` is primarily label based, but may also be used with a - boolean array. + boolean array. Note that if no row or column labels are specified + the labels will default to the integers 0 to n - 1, with n being + the number of rows/columns, respectively. Allowed inputs are: @@ -1426,7 +1429,7 @@ class _LocIndexer(_LocationIndexer): - A list or array of labels, e.g. ``['a', 'b', 'c']``. - A slice object with labels, e.g. ``'a':'f'`` (note that contrary to usual python slices, **both** the start and the stop are included!). - - A boolean array. + - A boolean array, e.g. [True, False, True]. - A ``callable`` function with one argument (the calling Series, DataFrame or Panel) and that returns valid output for indexing (one of the above) @@ -1434,6 +1437,45 @@ class _LocIndexer(_LocationIndexer): See more at :ref:`Selection by Label ` + See Also + -------- + at : Selects a single value for a row/column label pair + iat : Selects a single value for a row/column pair by integer position + iloc : Selects group of rows and columns by integer position(s) + + Examples + -------- + >>> df = pd.DataFrame([[12, 2, 3], [0, 4, 1], [10, 20, 30]], + ... index=['r0', 'r1', 'r2'], columns=['c0', 'c1', 'c2']) + >>> df + c0 c1 c2 + r0 12 2 3 + r1 0 4 1 + r2 10 20 30 + >>> df.loc['r1'] + c0 0 + c1 4 + c2 1 + Name: r1, dtype: int64 + >>> df.loc[['r1', 'r2']] + c0 c1 c2 + r1 0 4 1 + r2 10 20 30 + >>> df.loc['r0', 'c1'] + 2 + >>> df.loc['r0':'r1', 'c0'] + r0 12 + r1 0 + Name: c0, dtype: int64 + >>> df.loc[[False, False, True]] + c0 c1 c2 + r2 10 20 30 + >>> df.loc[df['c1'] > 10] + c0 c1 c2 + r2 10 20 30 + >>> df.loc[df['c1'] > 10, ['c0', 'c2']] + c0 c2 + r2 10 30 """ _valid_types = ("labels (MUST BE IN THE INDEX), slices of labels (BOTH " From 1a93d2a57fb85303d540ac322fe339fe1dd6462d Mon Sep 17 00:00:00 2001 From: Aaron Kosel Date: Sat, 10 Mar 2018 16:24:31 -0600 Subject: [PATCH 2/8] More labels for examples. More examples. Update wording based on PR comments --- pandas/core/indexing.py | 72 ++++++++++++++++++++++++++++++++++------- 1 file changed, 61 insertions(+), 11 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 6f70a611e1438..b91ec91654f40 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1417,9 +1417,7 @@ class _LocIndexer(_LocationIndexer): Selects a group of rows and columns by label(s) or a boolean array. ``.loc[]`` is primarily label based, but may also be used with a - boolean array. Note that if no row or column labels are specified - the labels will default to the integers 0 to n - 1, with n being - the number of rows/columns, respectively. + boolean array. Allowed inputs are: @@ -1429,19 +1427,19 @@ class _LocIndexer(_LocationIndexer): - A list or array of labels, e.g. ``['a', 'b', 'c']``. - A slice object with labels, e.g. ``'a':'f'`` (note that contrary to usual python slices, **both** the start and the stop are included!). - - A boolean array, e.g. [True, False, True]. + - A boolean array of the same length as the axis being sliced, + e.g. ``[True, False, True]``. - A ``callable`` function with one argument (the calling Series, DataFrame or Panel) and that returns valid output for indexing (one of the above) - ``.loc`` will raise a ``KeyError`` when the items are not found. - See more at :ref:`Selection by Label ` See Also -------- - at : Selects a single value for a row/column label pair - iat : Selects a single value for a row/column pair by integer position - iloc : Selects group of rows and columns by integer position(s) + DateFrame.at : Access a single value for a row/column label pair + DateFrame.iat : Access a single value for a row/column pair by integer + position + DateFrame.iloc : Access group of rows and columns by integer position(s) Examples -------- @@ -1452,30 +1450,82 @@ class _LocIndexer(_LocationIndexer): r0 12 2 3 r1 0 4 1 r2 10 20 30 + + Single label for row (note it would be faster to use ``DateFrame.at`` in + this case) + >>> df.loc['r1'] c0 0 c1 4 c2 1 Name: r1, dtype: int64 + + + Single label for row and column (note it would be faster to use + ``DateFrame.at`` in this case) + + >>> df.loc['r0', 'c1'] + 2 + + + A list of labels + >>> df.loc[['r1', 'r2']] c0 c1 c2 r1 0 4 1 r2 10 20 30 - >>> df.loc['r0', 'c1'] - 2 + + Slice with labels for row and single label for column. Note that + contrary to usual python slices, both the start and the stop are + included! + >>> df.loc['r0':'r1', 'c0'] r0 12 r1 0 Name: c0, dtype: int64 + + + Boolean list with the same length as the row axis + >>> df.loc[[False, False, True]] c0 c1 c2 r2 10 20 30 + + Callable that returns valid output for indexing + >>> df.loc[df['c1'] > 10] c0 c1 c2 r2 10 20 30 + + Callable that returns valid output with column labels specified + >>> df.loc[df['c1'] > 10, ['c0', 'c2']] c0 c2 r2 10 30 + + Another example using integers for the index + + >>> df = pd.DataFrame([[12, 2, 3], [0, 4, 1], [10, 20, 30]], + ... index=[7, 8, 9], columns=['c0', 'c1', 'c2']) + >>> df + c0 c1 c2 + 7 12 2 3 + 8 0 4 1 + 9 10 20 30 + + Slice with integer labels for rows. Note that contrary to usual + python slices, both the start and the stop are included! + + >>> df.loc[7:9] + c0 c1 c2 + 7 12 2 3 + 8 0 4 1 + 9 10 20 30 + + Raises + ------ + KeyError: + when items are not found """ _valid_types = ("labels (MUST BE IN THE INDEX), slices of labels (BOTH " From a3238d92c769f7d2bdb0aa7a932293de031ec31a Mon Sep 17 00:00:00 2001 From: Aaron Kosel Date: Sat, 10 Mar 2018 17:08:28 -0600 Subject: [PATCH 3/8] Remove lines and be consistent with wording --- pandas/core/indexing.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index b91ec91654f40..fbbb25e023493 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1414,7 +1414,7 @@ def _get_slice_axis(self, slice_obj, axis=None): class _LocIndexer(_LocationIndexer): """ - Selects a group of rows and columns by label(s) or a boolean array. + Access a group of rows and columns by label(s) or a boolean array. ``.loc[]`` is primarily label based, but may also be used with a boolean array. @@ -1460,14 +1460,12 @@ class _LocIndexer(_LocationIndexer): c2 1 Name: r1, dtype: int64 - Single label for row and column (note it would be faster to use ``DateFrame.at`` in this case) >>> df.loc['r0', 'c1'] 2 - A list of labels >>> df.loc[['r1', 'r2']] From 78f342c190b3e909c348dd4ca02fd4b61c9c1ecc Mon Sep 17 00:00:00 2001 From: Aaron Kosel Date: Sat, 10 Mar 2018 18:05:06 -0600 Subject: [PATCH 4/8] Add examples of setting values with loc --- pandas/core/indexing.py | 47 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 42 insertions(+), 5 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index fbbb25e023493..8370c19123def 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1436,10 +1436,12 @@ class _LocIndexer(_LocationIndexer): See Also -------- - DateFrame.at : Access a single value for a row/column label pair - DateFrame.iat : Access a single value for a row/column pair by integer - position - DateFrame.iloc : Access group of rows and columns by integer position(s) + DateFrame.at + Access a single value for a row/column label pair + DateFrame.iat + Access a single value for a row/column pair by integer position + DateFrame.iloc + Access group of rows and columns by integer position(s) Examples -------- @@ -1482,7 +1484,6 @@ class _LocIndexer(_LocationIndexer): r1 0 Name: c0, dtype: int64 - Boolean list with the same length as the row axis >>> df.loc[[False, False, True]] @@ -1501,6 +1502,42 @@ class _LocIndexer(_LocationIndexer): c0 c2 r2 10 30 + Set value for all items matching the list of labels + + >>> df.loc[['r1', 'r2'], ['c1']] = 70 + >>> df + c0 c1 c2 + r0 12 2 3 + r1 0 70 1 + r2 10 70 30 + + Set value for an entire row + + >>> df.loc['r0'] = 70 + >>> df + c0 c1 c2 + r0 70 70 70 + r1 0 70 1 + r2 10 70 30 + + Set value for an entire column + + >>> df.loc[:, 'c0'] = 30 + >>> df + c0 c1 c2 + r0 30 70 70 + r1 30 70 1 + r2 30 70 30 + + Set value for rows matching callable condition + + >>> df.loc[df['c2'] < 10] = 0 + >>> df + c0 c1 c2 + r0 30 70 70 + r1 0 0 0 + r2 30 70 30 + Another example using integers for the index >>> df = pd.DataFrame([[12, 2, 3], [0, 4, 1], [10, 20, 30]], From c28a79605069f09d082eebacf46c410b861599a2 Mon Sep 17 00:00:00 2001 From: Aaron Kosel Date: Sun, 11 Mar 2018 18:12:46 -0500 Subject: [PATCH 5/8] Update See Also. Add more examples and specifies return type --- pandas/core/indexing.py | 92 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 86 insertions(+), 6 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 8370c19123def..47697763d2769 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1438,10 +1438,10 @@ class _LocIndexer(_LocationIndexer): -------- DateFrame.at Access a single value for a row/column label pair - DateFrame.iat - Access a single value for a row/column pair by integer position DateFrame.iloc Access group of rows and columns by integer position(s) + Series.loc + Access group of values using labels Examples -------- @@ -1453,8 +1453,7 @@ class _LocIndexer(_LocationIndexer): r1 0 4 1 r2 10 20 30 - Single label for row (note it would be faster to use ``DateFrame.at`` in - this case) + Single label. Note this returns a Series. >>> df.loc['r1'] c0 0 @@ -1462,8 +1461,13 @@ class _LocIndexer(_LocationIndexer): c2 1 Name: r1, dtype: int64 - Single label for row and column (note it would be faster to use - ``DateFrame.at`` in this case) + List with a single label. Note using ``[[]]`` returns a DataFrame. + + >>> df.loc[['r1']] + c0 c1 c2 + r1 0 4 1 + + Single label for row and column >>> df.loc['r0', 'c1'] 2 @@ -1557,6 +1561,82 @@ class _LocIndexer(_LocationIndexer): 8 0 4 1 9 10 20 30 + A number of examples using a DataFrame with a multi-index + + >>> tuples = [('r0', 'bar'), ('r0', 'foo'), ('r1', 'bar'), + ... ('r1', 'foo'), ('r2', 'bar'), ('r2', 'baz')] + >>> index = pd.MultiIndex.from_tuples(tuples) + >>> values = [[12,2,3], [0,4,1], [10,20,30], + ... [1, 4, 1], [7, 1, 2], [16, 36, 40]] + >>> df = pd.DataFrame(values, columns=['c0', 'c1', 'c2'], index=index) + >>> df + c0 c1 c2 + r0 bar 12 2 3 + foo 0 4 1 + r1 bar 10 20 30 + foo 1 4 1 + r2 bar 7 1 2 + baz 16 36 40 + + Single label. Note this returns a DataFrame with a single index. + + >>> df.loc['r0'] + c0 c1 c2 + bar 12 2 3 + foo 0 4 1 + + Single index tuple. Note this returns a Series. + + >>> df.loc[('r0', 'bar')] + c0 12 + c1 2 + c2 3 + Name: (r0, bar), dtype: int64 + + Single label for row and column. Similar to passing in a tuple, this + returns a Series. + + >>> df.loc['r0', 'foo'] + c0 0 + c1 4 + c2 1 + Name: (r0, foo), dtype: int64 + + Single tuple. Note using ``[[]]`` returns a DataFrame. + + >>> df.loc[[('r0', 'bar')]] + c0 c1 c2 + r0 bar 12 2 3 + + Single tuple for the index with a single label for the column + + >>> df.loc[('r0', 'foo'), 'c1'] + 4 + + Boolean list + + >>> df.loc[[True, False, True, False, True, True]] + c0 c1 c2 + r0 bar 12 2 3 + r1 bar 10 20 30 + r2 bar 7 1 2 + baz 16 36 40 + + Slice from index tuple to single label + + >>> df.loc[('r0', 'foo'):'r1'] + c0 c1 c2 + r0 foo 0 4 1 + r1 bar 10 20 30 + foo 1 4 1 + + Slice from index tuple to index tuple + + >>> df.loc[('r0', 'foo'):('r1', 'bar')] + c0 c1 c2 + r0 foo 0 4 1 + r1 bar 10 20 30 + Raises ------ KeyError: From 64c698bf91bdc47a529de2e607760f9bbfe462c4 Mon Sep 17 00:00:00 2001 From: Aaron Kosel Date: Mon, 12 Mar 2018 23:53:50 -0500 Subject: [PATCH 6/8] Better labeling of subsections within docs --- pandas/core/indexing.py | 105 ++++++++++++++++++++++------------------ 1 file changed, 58 insertions(+), 47 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 47697763d2769..a80fda228a880 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1436,36 +1436,35 @@ class _LocIndexer(_LocationIndexer): See Also -------- - DateFrame.at - Access a single value for a row/column label pair - DateFrame.iloc - Access group of rows and columns by integer position(s) - Series.loc - Access group of values using labels + DateFrame.at : Access a single value for a row/column label pair + DateFrame.iloc : Access group of rows and columns by integer position(s) + Series.loc : Access group of values using labels Examples -------- - >>> df = pd.DataFrame([[12, 2, 3], [0, 4, 1], [10, 20, 30]], + **Getting values** + + >>> df = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], ... index=['r0', 'r1', 'r2'], columns=['c0', 'c1', 'c2']) >>> df c0 c1 c2 - r0 12 2 3 - r1 0 4 1 - r2 10 20 30 + r0 1 2 3 + r1 4 5 6 + r2 7 8 9 - Single label. Note this returns a Series. + Single label. Note this returns the row as a Series. >>> df.loc['r1'] - c0 0 - c1 4 - c2 1 + c0 4 + c1 5 + c2 6 Name: r1, dtype: int64 List with a single label. Note using ``[[]]`` returns a DataFrame. >>> df.loc[['r1']] c0 c1 c2 - r1 0 4 1 + r1 4 5 6 Single label for row and column @@ -1476,92 +1475,104 @@ class _LocIndexer(_LocationIndexer): >>> df.loc[['r1', 'r2']] c0 c1 c2 - r1 0 4 1 - r2 10 20 30 + r1 4 5 6 + r2 7 8 9 Slice with labels for row and single label for column. Note that contrary to usual python slices, both the start and the stop are included! >>> df.loc['r0':'r1', 'c0'] - r0 12 - r1 0 + r0 1 + r1 4 Name: c0, dtype: int64 Boolean list with the same length as the row axis >>> df.loc[[False, False, True]] c0 c1 c2 - r2 10 20 30 + r2 7 8 9 - Callable that returns valid output for indexing + Conditional that returns a boolean Series - >>> df.loc[df['c1'] > 10] + >>> df.loc[df['c1'] > 6] c0 c1 c2 - r2 10 20 30 + r2 7 8 9 - Callable that returns valid output with column labels specified + Conditional that returns a boolean Series with column labels specified - >>> df.loc[df['c1'] > 10, ['c0', 'c2']] + >>> df.loc[df['c1'] > 6, ['c0', 'c2']] c0 c2 - r2 10 30 + r2 7 9 + + Callable that returns a boolean Series + + >>> df.loc[lambda df: df['c1'] == 8] + c0 c1 c2 + r2 7 8 9 + + **Setting values** Set value for all items matching the list of labels - >>> df.loc[['r1', 'r2'], ['c1']] = 70 + >>> df.loc[['r1', 'r2'], ['c1']] = 50 >>> df c0 c1 c2 - r0 12 2 3 - r1 0 70 1 - r2 10 70 30 + r0 1 2 3 + r1 4 50 6 + r2 7 50 9 Set value for an entire row - >>> df.loc['r0'] = 70 + >>> df.loc['r0'] = 10 >>> df c0 c1 c2 - r0 70 70 70 - r1 0 70 1 - r2 10 70 30 + r0 10 10 10 + r1 4 50 6 + r2 7 50 9 Set value for an entire column >>> df.loc[:, 'c0'] = 30 >>> df c0 c1 c2 - r0 30 70 70 - r1 30 70 1 - r2 30 70 30 + r0 30 10 10 + r1 30 50 6 + r2 30 50 9 Set value for rows matching callable condition >>> df.loc[df['c2'] < 10] = 0 >>> df c0 c1 c2 - r0 30 70 70 + r0 30 10 10 r1 0 0 0 - r2 30 70 30 + r2 0 0 0 + + **Getting values on a DataFrame with an index that has integer labels** Another example using integers for the index - >>> df = pd.DataFrame([[12, 2, 3], [0, 4, 1], [10, 20, 30]], + >>> df = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], ... index=[7, 8, 9], columns=['c0', 'c1', 'c2']) >>> df c0 c1 c2 - 7 12 2 3 - 8 0 4 1 - 9 10 20 30 + 7 1 2 3 + 8 4 5 6 + 9 7 8 9 Slice with integer labels for rows. Note that contrary to usual python slices, both the start and the stop are included! >>> df.loc[7:9] c0 c1 c2 - 7 12 2 3 - 8 0 4 1 - 9 10 20 30 + 7 1 2 3 + 8 4 5 6 + 9 7 8 9 + + **Getting values with a MultiIndex** - A number of examples using a DataFrame with a multi-index + A number of examples using a DataFrame with a MultiIndex >>> tuples = [('r0', 'bar'), ('r0', 'foo'), ('r1', 'bar'), ... ('r1', 'foo'), ('r2', 'bar'), ('r2', 'baz')] From 0902b363ee5555e988efa33922a4814699ad947a Mon Sep 17 00:00:00 2001 From: Aaron Kosel Date: Tue, 13 Mar 2018 23:59:44 -0500 Subject: [PATCH 7/8] Update based on feedback --- pandas/core/indexing.py | 246 ++++++++++++++++++++-------------------- 1 file changed, 121 insertions(+), 125 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index a80fda228a880..87e6231d5aa68 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1425,8 +1425,11 @@ class _LocIndexer(_LocationIndexer): interpreted as a *label* of the index, and **never** as an integer position along the index). - A list or array of labels, e.g. ``['a', 'b', 'c']``. - - A slice object with labels, e.g. ``'a':'f'`` (note that contrary - to usual python slices, **both** the start and the stop are included!). + - A slice object with labels, e.g. ``'a':'f'``. + + .. warning:: Note that contrary to usual python slices, **both** the start + and the stop are included + - A boolean array of the same length as the axis being sliced, e.g. ``[True, False, True]``. - A ``callable`` function with one argument (the calling Series, DataFrame @@ -1438,220 +1441,213 @@ class _LocIndexer(_LocationIndexer): -------- DateFrame.at : Access a single value for a row/column label pair DateFrame.iloc : Access group of rows and columns by integer position(s) + DataFrame.xs : Returns a cross-section (row(s) or column(s)) from the + Series/DataFrame. Series.loc : Access group of values using labels Examples -------- **Getting values** - >>> df = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], - ... index=['r0', 'r1', 'r2'], columns=['c0', 'c1', 'c2']) + >>> df = pd.DataFrame([[1, 2], [4, 5], [7, 8]], + ... index=['cobra', 'viper', 'sidewinder'], + ... columns=['max_speed', 'shield']) >>> df - c0 c1 c2 - r0 1 2 3 - r1 4 5 6 - r2 7 8 9 + max_speed shield + cobra 1 2 + viper 4 5 + sidewinder 7 8 Single label. Note this returns the row as a Series. - >>> df.loc['r1'] - c0 4 - c1 5 - c2 6 - Name: r1, dtype: int64 + >>> df.loc['viper'] + max_speed 4 + shield 5 + Name: viper, dtype: int64 - List with a single label. Note using ``[[]]`` returns a DataFrame. + List of labels. Note using ``[[]]`` returns a DataFrame. - >>> df.loc[['r1']] - c0 c1 c2 - r1 4 5 6 + >>> df.loc[['viper', 'sidewinder']] + max_speed shield + viper 4 5 + sidewinder 7 8 Single label for row and column - >>> df.loc['r0', 'c1'] + >>> df.loc['cobra', 'shield'] 2 - A list of labels - - >>> df.loc[['r1', 'r2']] - c0 c1 c2 - r1 4 5 6 - r2 7 8 9 - - Slice with labels for row and single label for column. Note that - contrary to usual python slices, both the start and the stop are - included! + Slice with labels for row and single label for column. As mentioned + above, note that both the start and stop of the slice are included. - >>> df.loc['r0':'r1', 'c0'] - r0 1 - r1 4 - Name: c0, dtype: int64 + >>> df.loc['cobra':'viper', 'max_speed'] + cobra 1 + viper 4 + Name: max_speed, dtype: int64 Boolean list with the same length as the row axis >>> df.loc[[False, False, True]] - c0 c1 c2 - r2 7 8 9 + max_speed shield + sidewinder 7 8 Conditional that returns a boolean Series - >>> df.loc[df['c1'] > 6] - c0 c1 c2 - r2 7 8 9 + >>> df.loc[df['shield'] > 6] + max_speed shield + sidewinder 7 8 Conditional that returns a boolean Series with column labels specified - >>> df.loc[df['c1'] > 6, ['c0', 'c2']] - c0 c2 - r2 7 9 + >>> df.loc[df['shield'] > 6, ['max_speed']] + max_speed + sidewinder 7 Callable that returns a boolean Series - >>> df.loc[lambda df: df['c1'] == 8] - c0 c1 c2 - r2 7 8 9 + >>> df.loc[lambda df: df['shield'] == 8] + max_speed shield + sidewinder 7 8 **Setting values** Set value for all items matching the list of labels - >>> df.loc[['r1', 'r2'], ['c1']] = 50 + >>> df.loc[['viper', 'sidewinder'], ['shield']] = 50 >>> df - c0 c1 c2 - r0 1 2 3 - r1 4 50 6 - r2 7 50 9 + max_speed shield + cobra 1 2 + viper 4 50 + sidewinder 7 50 Set value for an entire row - >>> df.loc['r0'] = 10 + >>> df.loc['cobra'] = 10 >>> df - c0 c1 c2 - r0 10 10 10 - r1 4 50 6 - r2 7 50 9 + max_speed shield + cobra 10 10 + viper 4 50 + sidewinder 7 50 Set value for an entire column - >>> df.loc[:, 'c0'] = 30 + >>> df.loc[:, 'max_speed'] = 30 >>> df - c0 c1 c2 - r0 30 10 10 - r1 30 50 6 - r2 30 50 9 + max_speed shield + cobra 30 10 + viper 30 50 + sidewinder 30 50 Set value for rows matching callable condition - >>> df.loc[df['c2'] < 10] = 0 + >>> df.loc[df['shield'] > 35] = 0 >>> df - c0 c1 c2 - r0 30 10 10 - r1 0 0 0 - r2 0 0 0 + max_speed shield + cobra 30 10 + viper 0 0 + sidewinder 0 0 **Getting values on a DataFrame with an index that has integer labels** Another example using integers for the index - >>> df = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], - ... index=[7, 8, 9], columns=['c0', 'c1', 'c2']) + >>> df = pd.DataFrame([[1, 2], [4, 5], [7, 8]], + ... index=[7, 8, 9], columns=['max_speed', 'shield']) >>> df - c0 c1 c2 - 7 1 2 3 - 8 4 5 6 - 9 7 8 9 + max_speed shield + 7 1 2 + 8 4 5 + 9 7 8 - Slice with integer labels for rows. Note that contrary to usual - python slices, both the start and the stop are included! + Slice with integer labels for rows. As mentioned above, note that both + the start and stop of the slice are included. >>> df.loc[7:9] - c0 c1 c2 - 7 1 2 3 - 8 4 5 6 - 9 7 8 9 + max_speed shield + 7 1 2 + 8 4 5 + 9 7 8 **Getting values with a MultiIndex** A number of examples using a DataFrame with a MultiIndex - >>> tuples = [('r0', 'bar'), ('r0', 'foo'), ('r1', 'bar'), - ... ('r1', 'foo'), ('r2', 'bar'), ('r2', 'baz')] + >>> tuples = [ + ... ('cobra', 'mark i'), ('cobra', 'mark ii'), + ... ('sidewinder', 'mark i'), ('sidewinder', 'mark ii'), + ... ('viper', 'mark ii'), ('viper', 'mark iii') + ... ] >>> index = pd.MultiIndex.from_tuples(tuples) - >>> values = [[12,2,3], [0,4,1], [10,20,30], - ... [1, 4, 1], [7, 1, 2], [16, 36, 40]] - >>> df = pd.DataFrame(values, columns=['c0', 'c1', 'c2'], index=index) + >>> values = [[12, 2], [0, 4], [10, 20], + ... [1, 4], [7, 1], [16, 36]] + >>> df = pd.DataFrame(values, columns=['max_speed', 'shield'], index=index) >>> df - c0 c1 c2 - r0 bar 12 2 3 - foo 0 4 1 - r1 bar 10 20 30 - foo 1 4 1 - r2 bar 7 1 2 - baz 16 36 40 + max_speed shield + cobra mark i 12 2 + mark ii 0 4 + sidewinder mark i 10 20 + mark ii 1 4 + viper mark ii 7 1 + mark iii 16 36 Single label. Note this returns a DataFrame with a single index. - >>> df.loc['r0'] - c0 c1 c2 - bar 12 2 3 - foo 0 4 1 + >>> df.loc['cobra'] + max_speed shield + mark i 12 2 + mark ii 0 4 Single index tuple. Note this returns a Series. - >>> df.loc[('r0', 'bar')] - c0 12 - c1 2 - c2 3 - Name: (r0, bar), dtype: int64 + >>> df.loc[('cobra', 'mark ii')] + max_speed 0 + shield 4 + Name: (cobra, mark ii), dtype: int64 Single label for row and column. Similar to passing in a tuple, this returns a Series. - >>> df.loc['r0', 'foo'] - c0 0 - c1 4 - c2 1 - Name: (r0, foo), dtype: int64 + >>> df.loc['cobra', 'mark i'] + max_speed 12 + shield 2 + Name: (cobra, mark i), dtype: int64 Single tuple. Note using ``[[]]`` returns a DataFrame. - >>> df.loc[[('r0', 'bar')]] - c0 c1 c2 - r0 bar 12 2 3 + >>> df.loc[[('cobra', 'mark ii')]] + max_speed shield + cobra mark ii 0 4 Single tuple for the index with a single label for the column - >>> df.loc[('r0', 'foo'), 'c1'] - 4 - - Boolean list - - >>> df.loc[[True, False, True, False, True, True]] - c0 c1 c2 - r0 bar 12 2 3 - r1 bar 10 20 30 - r2 bar 7 1 2 - baz 16 36 40 + >>> df.loc[('cobra', 'mark i'), 'shield'] + 2 Slice from index tuple to single label - >>> df.loc[('r0', 'foo'):'r1'] - c0 c1 c2 - r0 foo 0 4 1 - r1 bar 10 20 30 - foo 1 4 1 + >>> df.loc[('cobra', 'mark i'):'viper'] + max_speed shield + cobra mark i 12 2 + mark ii 0 4 + sidewinder mark i 10 20 + mark ii 1 4 + viper mark ii 7 1 + mark iii 16 36 Slice from index tuple to index tuple - >>> df.loc[('r0', 'foo'):('r1', 'bar')] - c0 c1 c2 - r0 foo 0 4 1 - r1 bar 10 20 30 + >>> df.loc[('cobra', 'mark i'):('viper', 'mark ii')] + max_speed shield + cobra mark i 12 2 + mark ii 0 4 + sidewinder mark i 10 20 + mark ii 1 4 + viper mark ii 7 1 Raises ------ KeyError: - when items are not found + when any items are not found """ _valid_types = ("labels (MUST BE IN THE INDEX), slices of labels (BOTH " From a23a8e91e2cafeedaa036005b26f6fbdc74e9139 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 14 Mar 2018 13:51:35 +0100 Subject: [PATCH 8/8] rst formatting --- pandas/core/indexing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 87e6231d5aa68..e78b2bcf28d25 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1427,8 +1427,8 @@ class _LocIndexer(_LocationIndexer): - A list or array of labels, e.g. ``['a', 'b', 'c']``. - A slice object with labels, e.g. ``'a':'f'``. - .. warning:: Note that contrary to usual python slices, **both** the start - and the stop are included + .. warning:: Note that contrary to usual python slices, **both** the + start and the stop are included - A boolean array of the same length as the axis being sliced, e.g. ``[True, False, True]``.