Skip to content

Commit 657d255

Browse files
committed
Merge pull request #6734 from sinhrks/ind_nunique
ENH: added nunique function to Index
2 parents fe9aa12 + 91befdd commit 657d255

File tree

10 files changed

+294
-163
lines changed

10 files changed

+294
-163
lines changed

doc/source/api.rst

+4-2
Original file line numberDiff line numberDiff line change
@@ -348,16 +348,16 @@ Computations / Descriptive Stats
348348
Series.median
349349
Series.min
350350
Series.mode
351-
Series.nunique
352351
Series.pct_change
353352
Series.prod
354353
Series.quantile
355354
Series.rank
356355
Series.skew
357356
Series.std
358357
Series.sum
359-
Series.unique
360358
Series.var
359+
Series.unique
360+
Series.nunique
361361
Series.value_counts
362362

363363
Reindexing / Selection / Label manipulation
@@ -1053,6 +1053,8 @@ Modifying and Computations
10531053
Index.repeat
10541054
Index.set_names
10551055
Index.unique
1056+
Index.nunique
1057+
Index.value_counts
10561058

10571059
Conversion
10581060
~~~~~~~~~~

doc/source/release.rst

+2
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,8 @@ API Changes
159159
- Arithmetic ops are now disallowed when passed two bool dtype Series or
160160
DataFrames (:issue:`6762`).
161161

162+
- Added ``nunique`` and ``value_counts`` functions to ``Index`` for counting unique elements. (:issue:`6734`)
163+
162164
Deprecations
163165
~~~~~~~~~~~~
164166

doc/source/v0.14.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,7 @@ API changes
199199
- ``Series.iteritems()`` is now lazy (returns an iterator rather than a list). This was the documented behavior prior to 0.14. (:issue:`6760`)
200200
- ``Panel.shift`` now uses ``NDFrame.shift``. It no longer drops the ``nan`` data and retains its original shape. (:issue:`4867`)
201201

202+
- Added ``nunique`` and ``value_counts`` functions to ``Index`` for counting unique elements. (:issue:`6734`)
202203

203204
MultiIndexing Using Slicers
204205
~~~~~~~~~~~~~~~~~~~~~~~~~~~

pandas/core/base.py

+50
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,56 @@ def min(self):
269269
self._is_allowed_index_op('min')
270270
return self.values.min()
271271

272+
def value_counts(self, normalize=False, sort=True, ascending=False,
273+
bins=None):
274+
"""
275+
Returns object containing counts of unique values. The resulting object
276+
will be in descending order so that the first element is the most
277+
frequently-occurring element. Excludes NA values.
278+
279+
Parameters
280+
----------
281+
normalize : boolean, default False
282+
If True then the object returned will contain the relative
283+
frequencies of the unique values.
284+
sort : boolean, default True
285+
Sort by values
286+
ascending : boolean, default False
287+
Sort in ascending order
288+
bins : integer, optional
289+
Rather than count values, group them into half-open bins,
290+
a convenience for pd.cut, only works with numeric data
291+
292+
Returns
293+
-------
294+
counts : Series
295+
"""
296+
from pandas.core.algorithms import value_counts
297+
return value_counts(self.values, sort=sort, ascending=ascending,
298+
normalize=normalize, bins=bins)
299+
300+
def unique(self):
301+
"""
302+
Return array of unique values in the object. Significantly faster than
303+
numpy.unique. Includes NA values.
304+
305+
Returns
306+
-------
307+
uniques : ndarray
308+
"""
309+
from pandas.core.nanops import unique1d
310+
return unique1d(self.values)
311+
312+
def nunique(self):
313+
"""
314+
Return count of unique elements in the object. Excludes NA values.
315+
316+
Returns
317+
-------
318+
nunique : int
319+
"""
320+
return len(self.value_counts())
321+
272322
date = _field_accessor('date','Returns numpy array of datetime.date. The date part of the Timestamps')
273323
time = _field_accessor('time','Returns numpy array of datetime.time. The time part of the Timestamps')
274324
year = _field_accessor('year', "The year of the datetime")

pandas/core/index.py

-12
Original file line numberDiff line numberDiff line change
@@ -1102,18 +1102,6 @@ def sym_diff(self, other, result_name=None):
11021102
the_diff = sorted(set((self - other) + (other - self)))
11031103
return Index(the_diff, name=result_name)
11041104

1105-
def unique(self):
1106-
"""
1107-
Return array of unique values in the Index. Significantly faster than
1108-
numpy.unique
1109-
1110-
Returns
1111-
-------
1112-
uniques : ndarray
1113-
"""
1114-
from pandas.core.nanops import unique1d
1115-
return unique1d(self.values)
1116-
11171105
def get_loc(self, key):
11181106
"""
11191107
Get integer location for requested label

pandas/core/series.py

-49
Original file line numberDiff line numberDiff line change
@@ -1095,34 +1095,6 @@ def count(self, level=None):
10951095

10961096
return notnull(_values_from_object(self)).sum()
10971097

1098-
def value_counts(self, normalize=False, sort=True, ascending=False,
1099-
bins=None):
1100-
"""
1101-
Returns Series containing counts of unique values. The resulting Series
1102-
will be in descending order so that the first element is the most
1103-
frequently-occurring element. Excludes NA values
1104-
1105-
Parameters
1106-
----------
1107-
normalize : boolean, default False
1108-
If True then the Series returned will contain the relative
1109-
frequencies of the unique values.
1110-
sort : boolean, default True
1111-
Sort by values
1112-
ascending : boolean, default False
1113-
Sort in ascending order
1114-
bins : integer, optional
1115-
Rather than count values, group them into half-open bins,
1116-
a convenience for pd.cut, only works with numeric data
1117-
1118-
Returns
1119-
-------
1120-
counts : Series
1121-
"""
1122-
from pandas.core.algorithms import value_counts
1123-
return value_counts(self.values, sort=sort, ascending=ascending,
1124-
normalize=normalize, bins=bins)
1125-
11261098
def mode(self):
11271099
"""Returns the mode(s) of the dataset.
11281100
@@ -1143,27 +1115,6 @@ def mode(self):
11431115
from pandas.core.algorithms import mode
11441116
return mode(self)
11451117

1146-
def unique(self):
1147-
"""
1148-
Return array of unique values in the Series. Significantly faster than
1149-
numpy.unique
1150-
1151-
Returns
1152-
-------
1153-
uniques : ndarray
1154-
"""
1155-
return nanops.unique1d(self.values)
1156-
1157-
def nunique(self):
1158-
"""
1159-
Return count of unique elements in the Series
1160-
1161-
Returns
1162-
-------
1163-
nunique : int
1164-
"""
1165-
return len(self.value_counts())
1166-
11671118
def drop_duplicates(self, take_last=False, inplace=False):
11681119
"""
11691120
Return Series with duplicate values removed

0 commit comments

Comments
 (0)