@@ -7303,38 +7303,82 @@ def _get_agg_axis(self, axis_num):
7303
7303
7304
7304
def mode (self , axis = 0 , numeric_only = False , dropna = True ):
7305
7305
"""
7306
- Gets the mode(s) of each element along the axis selected. Adds a row
7307
- for each mode per label, fills in gaps with nan.
7306
+ Get the mode(s) of each element along the selected axis.
7308
7307
7309
- Note that there could be multiple values returned for the selected
7310
- axis (when more than one item share the maximum frequency), which is
7311
- the reason why a dataframe is returned. If you want to impute missing
7312
- values with the mode in a dataframe ``df``, you can just do this:
7313
- ``df.fillna(df.mode().iloc[0])``
7308
+ The mode of a set of values is the value that appears most often.
7309
+ It can be multiple values.
7314
7310
7315
7311
Parameters
7316
7312
----------
7317
7313
axis : {0 or 'index', 1 or 'columns'}, default 0
7314
+ The axis to iterate over while searching for the mode:
7315
+
7318
7316
* 0 or 'index' : get mode of each column
7319
7317
* 1 or 'columns' : get mode of each row
7320
- numeric_only : boolean , default False
7321
- if True, only apply to numeric columns
7322
- dropna : boolean , default True
7318
+ numeric_only : bool , default False
7319
+ If True, only apply to numeric columns.
7320
+ dropna : bool , default True
7323
7321
Don't consider counts of NaN/NaT.
7324
7322
7325
7323
.. versionadded:: 0.24.0
7326
7324
7327
7325
Returns
7328
7326
-------
7329
- modes : DataFrame (sorted)
7327
+ DataFrame
7328
+ The modes of each column or row.
7329
+
7330
+ See Also
7331
+ --------
7332
+ Series.mode : Return the highest frequency value in a Series.
7333
+ Series.value_counts : Return the counts of values in a Series.
7330
7334
7331
7335
Examples
7332
7336
--------
7333
- >>> df = pd.DataFrame({'A': [1, 2, 1, 2, 1, 2, 3]})
7337
+ >>> df = pd.DataFrame([('bird', 2, 2),
7338
+ ... ('mammal', 4, np.nan),
7339
+ ... ('arthropod', 8, 0),
7340
+ ... ('bird', 2, np.nan)],
7341
+ ... index=('falcon', 'horse', 'spider', 'ostrich'),
7342
+ ... columns=('species', 'legs', 'wings'))
7343
+ >>> df
7344
+ species legs wings
7345
+ falcon bird 2 2.0
7346
+ horse mammal 4 NaN
7347
+ spider arthropod 8 0.0
7348
+ ostrich bird 2 NaN
7349
+
7350
+ By default, missing values are not considered, and the mode of wings
7351
+ are both 0 and 2. The second row of species and legs contains ``NaN``,
7352
+ because they have only one mode, but the DataFrame has two rows.
7353
+
7334
7354
>>> df.mode()
7335
- A
7336
- 0 1
7337
- 1 2
7355
+ species legs wings
7356
+ 0 bird 2.0 0.0
7357
+ 1 NaN NaN 2.0
7358
+
7359
+ Setting ``dropna=False`` ``NaN`` values are considered and they can be
7360
+ the mode (like for wings).
7361
+
7362
+ >>> df.mode(dropna=False)
7363
+ species legs wings
7364
+ 0 bird 2 NaN
7365
+
7366
+ Setting ``numeric_only=True``, only the mode of numeric columns is
7367
+ computed, and columns of other types are ignored.
7368
+
7369
+ >>> df.mode(numeric_only=True)
7370
+ legs wings
7371
+ 0 2.0 0.0
7372
+ 1 NaN 2.0
7373
+
7374
+ To compute the mode over columns and not rows, use the axis parameter:
7375
+
7376
+ >>> df.mode(axis='columns', numeric_only=True)
7377
+ 0 1
7378
+ falcon 2.0 NaN
7379
+ horse 4.0 NaN
7380
+ spider 0.0 8.0
7381
+ ostrich 2.0 NaN
7338
7382
"""
7339
7383
data = self if not numeric_only else self ._get_numeric_data ()
7340
7384
0 commit comments