Skip to content

Commit 27f4f45

Browse files
lexualqwhelan
authored andcommitted
dropna added for unique method. Performance improvements for nunique method.
ref pandas-dev#7771
1 parent 7ea39cc commit 27f4f45

File tree

3 files changed

+84
-14
lines changed

3 files changed

+84
-14
lines changed

pandas/core/base.py

+16-13
Original file line numberDiff line numberDiff line change
@@ -432,21 +432,29 @@ def value_counts(self, normalize=False, sort=True, ascending=False,
432432
tz=getattr(self, 'tz', None))
433433
return result
434434

435-
def unique(self):
435+
def unique(self, dropna=False):
436436
"""
437437
Return array of unique values in the object. Significantly faster than
438438
numpy.unique. Includes NA values.
439439
440+
Parameters
441+
----------
442+
dropna : boolean, default False
443+
Don't include NaN in the result.
444+
440445
Returns
441446
-------
442447
uniques : ndarray
443448
"""
444-
from pandas.core.nanops import unique1d
445-
values = self.values
446-
if hasattr(values,'unique'):
447-
return values.unique()
448-
449-
return unique1d(values)
449+
if dropna:
450+
return self.dropna().unique()
451+
else:
452+
if hasattr(self.values, 'unique'):
453+
# Categorical Series not supported by unique1d
454+
return self.values.unique()
455+
else:
456+
from pandas.core.nanops import unique1d
457+
return unique1d(self.values)
450458

451459
def nunique(self, dropna=True):
452460
"""
@@ -463,12 +471,7 @@ def nunique(self, dropna=True):
463471
-------
464472
nunique : int
465473
"""
466-
uniqs = self.unique()
467-
n = len(uniqs)
468-
if dropna and com.isnull(uniqs).any():
469-
n -= 1
470-
return n
471-
474+
return len(self.unique(dropna=dropna))
472475

473476
def factorize(self, sort=False, na_sentinel=-1):
474477
"""

pandas/core/categorical.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1478,7 +1478,7 @@ def mode(self):
14781478
fastpath=True)
14791479
return result
14801480

1481-
def unique(self):
1481+
def unique(self, **kwargs):
14821482
"""
14831483
Return the unique values.
14841484

pandas/tseries/index.py

+67
Original file line numberDiff line numberDiff line change
@@ -794,6 +794,73 @@ def snap(self, freq='S'):
794794
# we know it conforms; skip check
795795
return DatetimeIndex(snapped, freq=freq, verify_integrity=False)
796796

797+
def shift(self, n, freq=None):
798+
"""
799+
Specialized shift which produces a DatetimeIndex
800+
801+
Parameters
802+
----------
803+
n : int
804+
Periods to shift by
805+
freq : DateOffset or timedelta-like, optional
806+
807+
Returns
808+
-------
809+
shifted : DatetimeIndex
810+
"""
811+
if freq is not None and freq != self.offset:
812+
if isinstance(freq, compat.string_types):
813+
freq = to_offset(freq)
814+
result = Index.shift(self, n, freq)
815+
result.tz = self.tz
816+
817+
return result
818+
819+
if n == 0:
820+
# immutable so OK
821+
return self
822+
823+
if self.offset is None:
824+
raise ValueError("Cannot shift with no offset")
825+
826+
start = self[0] + n * self.offset
827+
end = self[-1] + n * self.offset
828+
return DatetimeIndex(start=start, end=end, freq=self.offset,
829+
name=self.name, tz=self.tz)
830+
831+
def repeat(self, repeats, axis=None):
832+
"""
833+
Analogous to ndarray.repeat
834+
"""
835+
return DatetimeIndex(self.values.repeat(repeats),
836+
name=self.name)
837+
838+
def take(self, indices, axis=0):
839+
"""
840+
Analogous to ndarray.take
841+
"""
842+
maybe_slice = lib.maybe_indices_to_slice(com._ensure_int64(indices))
843+
if isinstance(maybe_slice, slice):
844+
return self[maybe_slice]
845+
return super(DatetimeIndex, self).take(indices, axis)
846+
847+
def unique(self, dropna=False):
848+
"""
849+
Index.unique with handling for DatetimeIndex metadata
850+
851+
Parameters
852+
----------
853+
dropna : boolean, default False
854+
Don't include NaN in the result.
855+
856+
Returns
857+
-------
858+
result : DatetimeIndex
859+
"""
860+
result = Int64Index.unique(self, dropna=dropna)
861+
return DatetimeIndex._simple_new(result, tz=self.tz,
862+
name=self.name)
863+
797864
def union(self, other):
798865
"""
799866
Specialized union for DatetimeIndex objects. If combine

0 commit comments

Comments
 (0)