Skip to content

Commit dd3cd71

Browse files
committed
BUG: handle NAs in Series.value_counts and describe with dtype=object, GH #277
1 parent 1bcf68e commit dd3cd71

File tree

3 files changed

+19
-5
lines changed

3 files changed

+19
-5
lines changed

pandas/core/series.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -604,15 +604,15 @@ def value_counts(self):
604604
"""
605605
Returns Series containing counts of unique values. The resulting Series
606606
will be in descending order so that the first element is the most
607-
frequently-occurring element
607+
frequently-occurring element. Excludes NA values
608608
609609
Returns
610610
-------
611611
counts : Series
612612
"""
613613
from collections import defaultdict
614614
counter = defaultdict(lambda: 0)
615-
for value in self.values:
615+
for value in self.dropna().values:
616616
counter[value] += 1
617617
return Series(counter).order(ascending=False)
618618

@@ -906,7 +906,7 @@ def describe(self):
906906
if self.dtype == object:
907907
names = ['count', 'unique', 'top', 'freq']
908908

909-
objcounts = Counter(self)
909+
objcounts = Counter(self.dropna().values)
910910
top, freq = objcounts.most_common(1)[0]
911911
data = [self.count(), len(objcounts), top, freq]
912912

pandas/tests/test_series.py

+13-1
Original file line numberDiff line numberDiff line change
@@ -555,7 +555,13 @@ def test_quantile(self):
555555
def test_describe(self):
556556
_ = self.series.describe()
557557
_ = self.ts.describe()
558-
_ = self.objSeries.describe()
558+
559+
def test_describe_objects(self):
560+
s = Series(['a', 'b', 'b', np.nan, np.nan, np.nan, 'c', 'd', 'a', 'a'])
561+
result = s.describe()
562+
expected = Series({'count' : 7, 'unique' : 4,
563+
'top' : 'a', 'freq' : 3}, index=result.index)
564+
assert_series_equal(result, expected)
559565

560566
def test_append(self):
561567
appendedSeries = self.series.append(self.ts)
@@ -770,6 +776,12 @@ def test_value_counts(self):
770776
expected = Series([4, 3, 2, 1], index=['b', 'a', 'd', 'c'])
771777
assert_series_equal(hist, expected)
772778

779+
# handle NA's properly
780+
s[5:7] = np.nan
781+
hist = s.value_counts()
782+
expected = s.dropna().value_counts()
783+
assert_series_equal(hist, expected)
784+
773785
s = Series({})
774786
hist = s.value_counts()
775787
expected = Series([])

pandas/util/testing.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
from __future__ import division
2+
13
# pylint: disable-msg=W0402
24

35
from datetime import datetime
@@ -116,7 +118,7 @@ def assert_dict_equal(a, b, compare_keys=True):
116118
assert_almost_equal(a[k], b[k])
117119

118120
def assert_series_equal(left, right):
119-
assert_almost_equal(left, right)
121+
assert_almost_equal(left.values, right.values)
120122
assert(left.dtype == right.dtype)
121123
assert(left.index.equals(right.index))
122124

0 commit comments

Comments
 (0)