Skip to content

Commit c8d3ac4

Browse files
committed
extend docs and tests
1 parent fd0f22d commit c8d3ac4

File tree

4 files changed

+66
-2
lines changed

4 files changed

+66
-2
lines changed

pandas/core/frame.py

+12
Original file line numberDiff line numberDiff line change
@@ -4986,6 +4986,18 @@ def nunique(self, axis=0, dropna=True):
49864986
Returns
49874987
-------
49884988
nunique : Series
4989+
4990+
Examples
4991+
--------
4992+
>>> df = DataFrame({'A': [1, 2, 3], 'B': [1, 1, 1]})
4993+
>>> df.nunique()
4994+
A 3
4995+
B 1
4996+
4997+
>>> df.nunique(axis=1)
4998+
0 1
4999+
1 2
5000+
2 2
49895001
"""
49905002
func = functools.partial(Series.nunique, dropna=dropna)
49915003
return self.apply(func, axis=axis)

pandas/core/groupby.py

+34-1
Original file line numberDiff line numberDiff line change
@@ -3901,14 +3901,47 @@ def count(self):
39013901

39023902
def nunique(self, dropna=True):
39033903
"""
3904-
Return Series with number of distinct observations per group.
3904+
Return DataFrame with number of distinct observations per group for
3905+
each column.
39053906
39063907
.. versionadded:: 0.20.0
39073908
39083909
Parameters
39093910
----------
39103911
dropna : boolean, default True
39113912
Don't include NaN in the counts.
3913+
3914+
Returns
3915+
-------
3916+
nunique: DataFrame
3917+
3918+
Examples
3919+
--------
3920+
>>> df = DataFrame({'id': ['spam', 'egg', 'egg', 'spam', 'ham', 'ham'],
3921+
... 'value1': [1, 5, 5, 2, 5, 5],
3922+
... 'value2': list('abbaxy')})
3923+
>>> df
3924+
id value1 value2
3925+
0 spam 1 a
3926+
1 egg 5 b
3927+
2 egg 5 b
3928+
3 spam 2 a
3929+
4 ham 5 x
3930+
5 ham 5 y
3931+
3932+
>>> df.groupby('id').nunique()
3933+
id value1 value2
3934+
id
3935+
egg 1 1 1
3936+
ham 1 1 2
3937+
spam 1 2 1
3938+
3939+
>>> df.groupby('id').filter(lambda g: (g.nunique() > 1).any())
3940+
id value1 value2
3941+
0 spam 1 a
3942+
3 spam 2 a
3943+
4 ham 5 x
3944+
5 ham 5 y
39123945
"""
39133946
from functools import partial
39143947
func = partial(Series.nunique, dropna=dropna)

pandas/tests/frame/test_analytics.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
MultiIndex, date_range, Timestamp)
1717
import pandas as pd
1818
import pandas.core.nanops as nanops
19+
import pandas.core.algorithms as algorithms
1920
import pandas.formats.printing as printing
2021

2122
import pandas.util.testing as tm
@@ -411,7 +412,7 @@ def test_count(self):
411412
tm.assert_series_equal(result, expected)
412413

413414
def test_nunique(self):
414-
f = lambda s: len(nanops.unique1d(s.dropna()))
415+
f = lambda s: len(algorithms.unique1d(s.dropna()))
415416
self._check_stat_op('nunique', f, has_skipna=False,
416417
check_dtype=False, check_dates=True)
417418

pandas/tests/groupby/test_groupby.py

+18
Original file line numberDiff line numberDiff line change
@@ -2806,10 +2806,28 @@ def test_nunique(self):
28062806
'B': list('abxacc'),
28072807
'C': list('abbacx'),
28082808
})
2809+
28092810
expected = DataFrame({'A': [1] * 3, 'B': [1, 2, 1], 'C': [1, 1, 2]})
28102811
result = df.groupby('A', as_index=False).nunique()
28112812
tm.assert_frame_equal(result, expected)
28122813

2814+
# as_index
2815+
expected.index = list('abc')
2816+
expected.index.name = 'A'
2817+
result = df.groupby('A').nunique()
2818+
tm.assert_frame_equal(result, expected)
2819+
2820+
# with na
2821+
result = df.replace({'x': None}).groupby('A').nunique(dropna=False)
2822+
tm.assert_frame_equal(result, expected)
2823+
2824+
# dropna
2825+
expected = DataFrame({'A': [1] * 3, 'B': [1] * 3, 'C': [1] * 3},
2826+
index=list('abc'))
2827+
expected.index.name = 'A'
2828+
result = df.replace({'x': None}).groupby('A').nunique()
2829+
tm.assert_frame_equal(result, expected)
2830+
28132831
def test_non_cython_api(self):
28142832

28152833
# GH5610

0 commit comments

Comments
 (0)