diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 413923262c6b0..4bb990a57cb4d 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -147,7 +147,7 @@ def factorize(values, sort=False, order=None, na_sentinel=-1): return labels, uniques -def value_counts(values, sort=True, ascending=False): +def value_counts(values, sort=True, ascending=False, normalize=False): """ Compute a histogram of the counts of non-null values @@ -158,6 +158,8 @@ def value_counts(values, sort=True, ascending=False): Sort by values ascending : boolean, default False Sort in ascending order + normalize: boolean, default False + If True then compute a relative histogram Returns ------- @@ -190,6 +192,9 @@ def value_counts(values, sort=True, ascending=False): if not ascending: result = result[::-1] + if normalize: + result = result / float(values.size) + return result diff --git a/pandas/core/series.py b/pandas/core/series.py index 2870bb1ab05b1..c6fe396b08867 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1379,18 +1379,25 @@ def count(self, level=None): return notnull(self.values).sum() - def value_counts(self): + def value_counts(self, normalize=False): """ Returns Series containing counts of unique values. The resulting Series will be in descending order so that the first element is the most frequently-occurring element. Excludes NA values + Parameters + ---------- + normalize: boolean, default False + If True then the Series returned will contain the relative + frequencies of the unique values. + Returns ------- counts : Series """ from pandas.core.algorithms import value_counts - return value_counts(self.values, sort=True, ascending=False) + return value_counts(self.values, sort=True, ascending=False, + normalize=normalize) def unique(self): """ diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index cef309fd59503..74b41f4ef1cd7 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -2383,6 +2383,11 @@ def test_value_counts_nunique(self): expected = Series([4, 3, 2, 1], index=['b', 'a', 'd', 'c']) assert_series_equal(hist, expected) + # relative histogram. + hist = s.value_counts(normalize=True) + expected = Series([.4, .3, .2, .1], index=['b', 'a', 'd', 'c']) + assert_series_equal(hist, expected) + self.assertEquals(s.nunique(), 4) # handle NA's properly