Skip to content

Commit ff124f9

Browse files
committed
BUG: fix isnull behavior when passed PeriodIndex with NaT (GH 9129)
PERF: use unique and isnull in nunique instead of value_counts.
1 parent ab20769 commit ff124f9

File tree

4 files changed

+24
-2
lines changed

4 files changed

+24
-2
lines changed

doc/source/whatsnew/v0.16.0.txt

+5
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,12 @@ Performance
4545

4646
.. _whatsnew_0160.performance:
4747

48+
4849
- Fixed a severe performance regression for ``.loc`` indexing with an array or list (:issue:9126:).
4950

51+
- Improved the speed of `nunique` by calling `unique` instead of `value_counts` (:issue:`9129`, :issue:`7771`)
52+
53+
5054
Bug Fixes
5155
~~~~~~~~~
5256

@@ -114,3 +118,4 @@ Bug Fixes
114118

115119
- DataFrame now properly supports simultaneous ``copy`` and ``dtype`` arguments in constructor (:issue:`9099`)
116120
- Bug in read_csv when using skiprows on a file with CR line endings with the c engine. (:issue:`9079`)
121+
- isnull now detects NaT in PeriodIndex (:issue:`9129`)

pandas/core/base.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -441,7 +441,12 @@ def nunique(self, dropna=True):
441441
-------
442442
nunique : int
443443
"""
444-
return len(self.value_counts(dropna=dropna))
444+
uniqs = self.unique()
445+
n = len(uniqs)
446+
if dropna and com.isnull(uniqs).any():
447+
n -= 1
448+
return n
449+
445450

446451
def factorize(self, sort=False, na_sentinel=-1):
447452
"""

pandas/core/common.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -302,7 +302,7 @@ def _isnull_ndarraylike(obj):
302302
vec = lib.isnullobj(values.ravel())
303303
result[...] = vec.reshape(shape)
304304

305-
elif dtype in _DATELIKE_DTYPES:
305+
elif is_datetimelike(obj):
306306
# this is the NaT pattern
307307
result = values.view('i8') == tslib.iNaT
308308
else:
@@ -2366,6 +2366,9 @@ def is_datetime_arraylike(arr):
23662366
return arr.dtype == object and lib.infer_dtype(arr) == 'datetime'
23672367
return getattr(arr, 'inferred_type', None) == 'datetime'
23682368

2369+
def is_datetimelike(arr):
2370+
return arr.dtype in _DATELIKE_DTYPES or isinstance(arr, ABCPeriodIndex)
2371+
23692372
def _coerce_to_dtype(dtype):
23702373
""" coerce a string / np.dtype to a dtype """
23712374
if is_categorical_dtype(dtype):

pandas/tests/test_common.py

+9
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,15 @@ def test_isnull_datetime():
157157
assert(mask[0])
158158
assert(not mask[1:].any())
159159

160+
# GH 9129
161+
pidx = idx.to_period(freq='M')
162+
mask = isnull(pidx)
163+
assert(mask[0])
164+
assert(not mask[1:].any())
165+
166+
mask = isnull(pidx[1:])
167+
assert(not mask.any())
168+
160169

161170
class TestIsNull(tm.TestCase):
162171
def test_0d_array(self):

0 commit comments

Comments
 (0)