From 3e6acc895d97cbe3efb04380109b5c0f7ef156ec Mon Sep 17 00:00:00 2001 From: marcosrullan Date: Sat, 10 Mar 2018 14:51:38 +0100 Subject: [PATCH 1/4] DOC: Added examples to the IndexOpsmixin.value_counts() docstring --- pandas/core/base.py | 52 +++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 48 insertions(+), 4 deletions(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index 280b8849792e3..0cd81d2fc0cfe 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -931,7 +931,7 @@ def map_f(values, f): def value_counts(self, normalize=False, sort=True, ascending=False, bins=None, dropna=True): """ - Returns object containing counts of unique values. + Return object containing counts of unique values. The resulting object will be in descending order so that the first element is the most frequently-occurring element. @@ -943,18 +943,62 @@ def value_counts(self, normalize=False, sort=True, ascending=False, If True then the object returned will contain the relative frequencies of the unique values. sort : boolean, default True - Sort by values + Sort by values. ascending : boolean, default False - Sort in ascending order + Sort in ascending order. bins : integer, optional Rather than count values, group them into half-open bins, - a convenience for pd.cut, only works with numeric data + a convenience for pd.cut, only works with numeric data. dropna : boolean, default True Don't include counts of NaN. Returns ------- counts : Series + + Examples + -------- + + >>> index = pd.Index([3, 1, 2, 3, 4, np.nan]) + >>> index.value_counts() + 3.0 2 + 4.0 1 + 2.0 1 + 1.0 1 + dtype: int64 + + With `normalize` set to `True`, returns the relative frequency by dividing all values by the sum of values. + + >>> s = pd.Series([3, 1, 2, 3, 4, np.nan]) + >>> s.value_counts(normalize=True) + 3.0 0.4 + 4.0 0.2 + 2.0 0.2 + 1.0 0.2 + dtype: float64 + + **bins** + + Bins can be useful for going from a continuous variable to a categorical variable; instead of counting unique + apparitions of values, divide the index in the specified number of half-open bins. + + >>> s.value_counts(bins=3) + (2.0, 3.0] 2 + (0.996, 2.0] 2 + (3.0, 4.0] 1 + dtype: int64 + + **dropna** + + With `dropna` set to `False` we can also see NaN index values. + + >>> s.value_counts(dropna=False) + 3.0 2 + NaN 1 + 4.0 1 + 2.0 1 + 1.0 1 + dtype: int64 """ from pandas.core.algorithms import value_counts result = value_counts(self, sort=sort, ascending=ascending, From 8307b88881315bf260986d75a0b953d59fa3a216 Mon Sep 17 00:00:00 2001 From: marcosrullan Date: Sat, 10 Mar 2018 14:59:46 +0100 Subject: [PATCH 2/4] DOC: Added examples to the IndexOpsmixin.value_counts() docstring --- pandas/core/base.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index 0cd81d2fc0cfe..b792660b3813b 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -967,7 +967,8 @@ def value_counts(self, normalize=False, sort=True, ascending=False, 1.0 1 dtype: int64 - With `normalize` set to `True`, returns the relative frequency by dividing all values by the sum of values. + With `normalize` set to `True`, returns the relative frequency by + dividing all values by the sum of values. >>> s = pd.Series([3, 1, 2, 3, 4, np.nan]) >>> s.value_counts(normalize=True) @@ -979,8 +980,10 @@ def value_counts(self, normalize=False, sort=True, ascending=False, **bins** - Bins can be useful for going from a continuous variable to a categorical variable; instead of counting unique - apparitions of values, divide the index in the specified number of half-open bins. + Bins can be useful for going from a continuous variable to a + categorical variable; instead of counting unique + apparitions of values, divide the index in the specified + number of half-open bins. >>> s.value_counts(bins=3) (2.0, 3.0] 2 From 3eab1129261f4299b186fe129e896cf162a58cb2 Mon Sep 17 00:00:00 2001 From: marcosrullan Date: Sat, 10 Mar 2018 15:30:59 +0100 Subject: [PATCH 3/4] DOC: remove blank after section --- pandas/core/base.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index b792660b3813b..4537fcddbffe3 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -958,7 +958,6 @@ def value_counts(self, normalize=False, sort=True, ascending=False, Examples -------- - >>> index = pd.Index([3, 1, 2, 3, 4, np.nan]) >>> index.value_counts() 3.0 2 From 17e4e4ee9e667f8f743537b28b0b6695756dcf6a Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sat, 7 Jul 2018 10:37:09 -0500 Subject: [PATCH 4/4] small cleanup --- pandas/core/base.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index 6ffc4651c4358..4ea8c43b640fb 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -1008,7 +1008,7 @@ def map_f(values, f): def value_counts(self, normalize=False, sort=True, ascending=False, bins=None, dropna=True): """ - Return object containing counts of unique values. + Return a Series containing counts of unique values. The resulting object will be in descending order so that the first element is the most frequently-occurring element. @@ -1025,7 +1025,7 @@ def value_counts(self, normalize=False, sort=True, ascending=False, Sort in ascending order. bins : integer, optional Rather than count values, group them into half-open bins, - a convenience for pd.cut, only works with numeric data. + a convenience for ``pd.cut``, only works with numeric data. dropna : boolean, default True Don't include counts of NaN. @@ -1033,6 +1033,11 @@ def value_counts(self, normalize=False, sort=True, ascending=False, ------- counts : Series + See Also + -------- + Series.count: number of non-NA elements in a Series + DataFrame.count: number of non-NA elements in a DataFrame + Examples -------- >>> index = pd.Index([3, 1, 2, 3, 4, np.nan]) @@ -1073,7 +1078,7 @@ def value_counts(self, normalize=False, sort=True, ascending=False, >>> s.value_counts(dropna=False) 3.0 2 - NaN 1 + NaN 1 4.0 1 2.0 1 1.0 1