Skip to content

Commit c348314

Browse files
author
Nick Eubank
committed
fixes 12558
1 parent e5ed87b commit c348314

File tree

3 files changed

+12
-1
lines changed

3 files changed

+12
-1
lines changed

doc/source/whatsnew/v0.18.1.txt

+1
Original file line numberDiff line numberDiff line change
@@ -43,3 +43,4 @@ Performance Improvements
4343

4444
Bug Fixes
4545
~~~~~~~~~
46+
- Bug in ``value_counts`` where normalizes over all observations including missing even when ``dropna=True`` (:issue:`12558`)

pandas/core/algorithms.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,7 @@ def value_counts(values, sort=True, ascending=False, normalize=False,
269269
from pandas.core.series import Series
270270
from pandas.tools.tile import cut
271271
from pandas import Index, PeriodIndex, DatetimeIndex
272+
from pandas.core.common import notnull
272273

273274
name = getattr(values, 'name', None)
274275
values = Series(values).values
@@ -342,7 +343,10 @@ def value_counts(values, sort=True, ascending=False, normalize=False,
342343
result = result.sort_values(ascending=ascending)
343344

344345
if normalize:
345-
result = result / float(values.size)
346+
if dropna:
347+
result = result / float(values[notnull(values)].size)
348+
else:
349+
result = result / float(values.size)
346350

347351
return result
348352

pandas/tests/test_algos.py

+6
Original file line numberDiff line numberDiff line change
@@ -517,6 +517,12 @@ def test_dropna(self):
517517
pd.Series([10.3, 5., 5., None]).value_counts(dropna=False),
518518
pd.Series([2, 1, 1], index=[5., 10.3, np.nan]))
519519

520+
def test_dropna_normalize(self):
521+
# Issue 12558
522+
tm.assert_series_equal(
523+
pd.Series([ 5.,10.3,10.3,10.3,np.nan]).value_counts(dropna=True, normalize=True),
524+
pd.Series([0.75, 0.25], index=[10.3, 5.]))
525+
520526

521527
class GroupVarTestMixin(object):
522528

0 commit comments

Comments
 (0)