diff --git a/pandas/core/base.py b/pandas/core/base.py index 6625a3bbe97d7..4ea8c43b640fb 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -1008,7 +1008,7 @@ def map_f(values, f): def value_counts(self, normalize=False, sort=True, ascending=False, bins=None, dropna=True): """ - Returns object containing counts of unique values. + Return a Series containing counts of unique values. The resulting object will be in descending order so that the first element is the most frequently-occurring element. @@ -1020,18 +1020,69 @@ def value_counts(self, normalize=False, sort=True, ascending=False, If True then the object returned will contain the relative frequencies of the unique values. sort : boolean, default True - Sort by values + Sort by values. ascending : boolean, default False - Sort in ascending order + Sort in ascending order. bins : integer, optional Rather than count values, group them into half-open bins, - a convenience for pd.cut, only works with numeric data + a convenience for ``pd.cut``, only works with numeric data. dropna : boolean, default True Don't include counts of NaN. Returns ------- counts : Series + + See Also + -------- + Series.count: number of non-NA elements in a Series + DataFrame.count: number of non-NA elements in a DataFrame + + Examples + -------- + >>> index = pd.Index([3, 1, 2, 3, 4, np.nan]) + >>> index.value_counts() + 3.0 2 + 4.0 1 + 2.0 1 + 1.0 1 + dtype: int64 + + With `normalize` set to `True`, returns the relative frequency by + dividing all values by the sum of values. + + >>> s = pd.Series([3, 1, 2, 3, 4, np.nan]) + >>> s.value_counts(normalize=True) + 3.0 0.4 + 4.0 0.2 + 2.0 0.2 + 1.0 0.2 + dtype: float64 + + **bins** + + Bins can be useful for going from a continuous variable to a + categorical variable; instead of counting unique + apparitions of values, divide the index in the specified + number of half-open bins. + + >>> s.value_counts(bins=3) + (2.0, 3.0] 2 + (0.996, 2.0] 2 + (3.0, 4.0] 1 + dtype: int64 + + **dropna** + + With `dropna` set to `False` we can also see NaN index values. + + >>> s.value_counts(dropna=False) + 3.0 2 + NaN 1 + 4.0 1 + 2.0 1 + 1.0 1 + dtype: int64 """ from pandas.core.algorithms import value_counts result = value_counts(self, sort=sort, ascending=ascending,