Skip to content

Commit 28eb190

Browse files
akoselTomAugspurger
authored andcommitted
DOC: update the DataFrame.loc docstring (#20229)
1 parent c0d93f9 commit 28eb190

File tree

1 file changed

+220
-6
lines changed

1 file changed

+220
-6
lines changed

pandas/core/indexing.py

+220-6
Original file line numberDiff line numberDiff line change
@@ -1413,7 +1413,8 @@ def _get_slice_axis(self, slice_obj, axis=None):
14131413

14141414

14151415
class _LocIndexer(_LocationIndexer):
1416-
"""Purely label-location based indexer for selection by label.
1416+
"""
1417+
Access a group of rows and columns by label(s) or a boolean array.
14171418
14181419
``.loc[]`` is primarily label based, but may also be used with a
14191420
boolean array.
@@ -1424,16 +1425,229 @@ class _LocIndexer(_LocationIndexer):
14241425
interpreted as a *label* of the index, and **never** as an
14251426
integer position along the index).
14261427
- A list or array of labels, e.g. ``['a', 'b', 'c']``.
1427-
- A slice object with labels, e.g. ``'a':'f'`` (note that contrary
1428-
to usual python slices, **both** the start and the stop are included!).
1429-
- A boolean array.
1428+
- A slice object with labels, e.g. ``'a':'f'``.
1429+
1430+
.. warning:: Note that contrary to usual python slices, **both** the
1431+
start and the stop are included
1432+
1433+
- A boolean array of the same length as the axis being sliced,
1434+
e.g. ``[True, False, True]``.
14301435
- A ``callable`` function with one argument (the calling Series, DataFrame
14311436
or Panel) and that returns valid output for indexing (one of the above)
14321437
1433-
``.loc`` will raise a ``KeyError`` when the items are not found.
1434-
14351438
See more at :ref:`Selection by Label <indexing.label>`
14361439
1440+
See Also
1441+
--------
1442+
DateFrame.at : Access a single value for a row/column label pair
1443+
DateFrame.iloc : Access group of rows and columns by integer position(s)
1444+
DataFrame.xs : Returns a cross-section (row(s) or column(s)) from the
1445+
Series/DataFrame.
1446+
Series.loc : Access group of values using labels
1447+
1448+
Examples
1449+
--------
1450+
**Getting values**
1451+
1452+
>>> df = pd.DataFrame([[1, 2], [4, 5], [7, 8]],
1453+
... index=['cobra', 'viper', 'sidewinder'],
1454+
... columns=['max_speed', 'shield'])
1455+
>>> df
1456+
max_speed shield
1457+
cobra 1 2
1458+
viper 4 5
1459+
sidewinder 7 8
1460+
1461+
Single label. Note this returns the row as a Series.
1462+
1463+
>>> df.loc['viper']
1464+
max_speed 4
1465+
shield 5
1466+
Name: viper, dtype: int64
1467+
1468+
List of labels. Note using ``[[]]`` returns a DataFrame.
1469+
1470+
>>> df.loc[['viper', 'sidewinder']]
1471+
max_speed shield
1472+
viper 4 5
1473+
sidewinder 7 8
1474+
1475+
Single label for row and column
1476+
1477+
>>> df.loc['cobra', 'shield']
1478+
2
1479+
1480+
Slice with labels for row and single label for column. As mentioned
1481+
above, note that both the start and stop of the slice are included.
1482+
1483+
>>> df.loc['cobra':'viper', 'max_speed']
1484+
cobra 1
1485+
viper 4
1486+
Name: max_speed, dtype: int64
1487+
1488+
Boolean list with the same length as the row axis
1489+
1490+
>>> df.loc[[False, False, True]]
1491+
max_speed shield
1492+
sidewinder 7 8
1493+
1494+
Conditional that returns a boolean Series
1495+
1496+
>>> df.loc[df['shield'] > 6]
1497+
max_speed shield
1498+
sidewinder 7 8
1499+
1500+
Conditional that returns a boolean Series with column labels specified
1501+
1502+
>>> df.loc[df['shield'] > 6, ['max_speed']]
1503+
max_speed
1504+
sidewinder 7
1505+
1506+
Callable that returns a boolean Series
1507+
1508+
>>> df.loc[lambda df: df['shield'] == 8]
1509+
max_speed shield
1510+
sidewinder 7 8
1511+
1512+
**Setting values**
1513+
1514+
Set value for all items matching the list of labels
1515+
1516+
>>> df.loc[['viper', 'sidewinder'], ['shield']] = 50
1517+
>>> df
1518+
max_speed shield
1519+
cobra 1 2
1520+
viper 4 50
1521+
sidewinder 7 50
1522+
1523+
Set value for an entire row
1524+
1525+
>>> df.loc['cobra'] = 10
1526+
>>> df
1527+
max_speed shield
1528+
cobra 10 10
1529+
viper 4 50
1530+
sidewinder 7 50
1531+
1532+
Set value for an entire column
1533+
1534+
>>> df.loc[:, 'max_speed'] = 30
1535+
>>> df
1536+
max_speed shield
1537+
cobra 30 10
1538+
viper 30 50
1539+
sidewinder 30 50
1540+
1541+
Set value for rows matching callable condition
1542+
1543+
>>> df.loc[df['shield'] > 35] = 0
1544+
>>> df
1545+
max_speed shield
1546+
cobra 30 10
1547+
viper 0 0
1548+
sidewinder 0 0
1549+
1550+
**Getting values on a DataFrame with an index that has integer labels**
1551+
1552+
Another example using integers for the index
1553+
1554+
>>> df = pd.DataFrame([[1, 2], [4, 5], [7, 8]],
1555+
... index=[7, 8, 9], columns=['max_speed', 'shield'])
1556+
>>> df
1557+
max_speed shield
1558+
7 1 2
1559+
8 4 5
1560+
9 7 8
1561+
1562+
Slice with integer labels for rows. As mentioned above, note that both
1563+
the start and stop of the slice are included.
1564+
1565+
>>> df.loc[7:9]
1566+
max_speed shield
1567+
7 1 2
1568+
8 4 5
1569+
9 7 8
1570+
1571+
**Getting values with a MultiIndex**
1572+
1573+
A number of examples using a DataFrame with a MultiIndex
1574+
1575+
>>> tuples = [
1576+
... ('cobra', 'mark i'), ('cobra', 'mark ii'),
1577+
... ('sidewinder', 'mark i'), ('sidewinder', 'mark ii'),
1578+
... ('viper', 'mark ii'), ('viper', 'mark iii')
1579+
... ]
1580+
>>> index = pd.MultiIndex.from_tuples(tuples)
1581+
>>> values = [[12, 2], [0, 4], [10, 20],
1582+
... [1, 4], [7, 1], [16, 36]]
1583+
>>> df = pd.DataFrame(values, columns=['max_speed', 'shield'], index=index)
1584+
>>> df
1585+
max_speed shield
1586+
cobra mark i 12 2
1587+
mark ii 0 4
1588+
sidewinder mark i 10 20
1589+
mark ii 1 4
1590+
viper mark ii 7 1
1591+
mark iii 16 36
1592+
1593+
Single label. Note this returns a DataFrame with a single index.
1594+
1595+
>>> df.loc['cobra']
1596+
max_speed shield
1597+
mark i 12 2
1598+
mark ii 0 4
1599+
1600+
Single index tuple. Note this returns a Series.
1601+
1602+
>>> df.loc[('cobra', 'mark ii')]
1603+
max_speed 0
1604+
shield 4
1605+
Name: (cobra, mark ii), dtype: int64
1606+
1607+
Single label for row and column. Similar to passing in a tuple, this
1608+
returns a Series.
1609+
1610+
>>> df.loc['cobra', 'mark i']
1611+
max_speed 12
1612+
shield 2
1613+
Name: (cobra, mark i), dtype: int64
1614+
1615+
Single tuple. Note using ``[[]]`` returns a DataFrame.
1616+
1617+
>>> df.loc[[('cobra', 'mark ii')]]
1618+
max_speed shield
1619+
cobra mark ii 0 4
1620+
1621+
Single tuple for the index with a single label for the column
1622+
1623+
>>> df.loc[('cobra', 'mark i'), 'shield']
1624+
2
1625+
1626+
Slice from index tuple to single label
1627+
1628+
>>> df.loc[('cobra', 'mark i'):'viper']
1629+
max_speed shield
1630+
cobra mark i 12 2
1631+
mark ii 0 4
1632+
sidewinder mark i 10 20
1633+
mark ii 1 4
1634+
viper mark ii 7 1
1635+
mark iii 16 36
1636+
1637+
Slice from index tuple to index tuple
1638+
1639+
>>> df.loc[('cobra', 'mark i'):('viper', 'mark ii')]
1640+
max_speed shield
1641+
cobra mark i 12 2
1642+
mark ii 0 4
1643+
sidewinder mark i 10 20
1644+
mark ii 1 4
1645+
viper mark ii 7 1
1646+
1647+
Raises
1648+
------
1649+
KeyError:
1650+
when any items are not found
14371651
"""
14381652

14391653
_valid_types = ("labels (MUST BE IN THE INDEX), slices of labels (BOTH "

0 commit comments

Comments
 (0)