Skip to content

Commit 5435247

Browse files
committed
CLN: Datetimelike._can_hold_na
1 parent 8fdfa51 commit 5435247

File tree

5 files changed

+128
-60
lines changed

5 files changed

+128
-60
lines changed

pandas/tests/indexes/common.py

+11-2
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from pandas import (Series, Index, Float64Index, Int64Index, RangeIndex,
99
MultiIndex, CategoricalIndex, DatetimeIndex,
1010
TimedeltaIndex, PeriodIndex, notnull)
11+
from pandas.types.common import needs_i8_conversion
1112
from pandas.util.testing import assertRaisesRegexp
1213

1314
import pandas.util.testing as tm
@@ -319,13 +320,21 @@ def test_get_unique_index(self):
319320
if not ind._can_hold_na:
320321
continue
321322

322-
vals = ind.values[[0] * 5]
323-
vals[0] = np.nan
323+
if needs_i8_conversion(ind):
324+
vals = ind.asi8[[0] * 5]
325+
vals[0] = pd.tslib.iNaT
326+
else:
327+
vals = ind.values[[0] * 5]
328+
vals[0] = np.nan
329+
324330
vals_unique = vals[:2]
325331
idx_nan = ind._shallow_copy(vals)
326332
idx_unique_nan = ind._shallow_copy(vals_unique)
327333
self.assertTrue(idx_unique_nan.is_unique)
328334

335+
self.assertEqual(idx_nan.dtype, ind.dtype)
336+
self.assertEqual(idx_unique_nan.dtype, ind.dtype)
337+
329338
for dropna, expected in zip([False, True],
330339
[idx_unique_nan, idx_unique]):
331340
for i in [idx_nan, idx_unique_nan]:

pandas/tests/test_base.py

+52-48
Original file line numberDiff line numberDiff line change
@@ -9,15 +9,15 @@
99

1010
import pandas as pd
1111
import pandas.compat as compat
12-
from pandas.types.common import is_object_dtype, is_datetimetz
12+
from pandas.types.common import (is_object_dtype, is_datetimetz,
13+
needs_i8_conversion)
1314
import pandas.util.testing as tm
1415
from pandas import (Series, Index, DatetimeIndex, TimedeltaIndex, PeriodIndex,
1516
Timedelta)
1617
from pandas.compat import u, StringIO
1718
from pandas.compat.numpy import np_array_datetime64_compat
1819
from pandas.core.base import (FrozenList, FrozenNDArray, PandasDelegate,
1920
NoNewAttributesMixin)
20-
from pandas.types.common import is_datetime64_dtype
2121
from pandas.tseries.base import DatetimeIndexOpsMixin
2222

2323

@@ -450,7 +450,6 @@ def test_nanops(self):
450450

451451
def test_value_counts_unique_nunique(self):
452452
for orig in self.objs:
453-
454453
o = orig.copy()
455454
klass = type(o)
456455
values = o._values
@@ -504,9 +503,10 @@ def test_value_counts_unique_nunique(self):
504503
def test_value_counts_unique_nunique_null(self):
505504

506505
for null_obj in [np.nan, None]:
507-
for o in self.objs:
506+
for orig in self.objs:
507+
o = orig.copy()
508508
klass = type(o)
509-
values = o.values
509+
values = o._values
510510

511511
if not self._allow_na_ops(o):
512512
continue
@@ -522,34 +522,43 @@ def test_value_counts_unique_nunique_null(self):
522522
o[0:2] = pd.tslib.iNaT
523523
values = o._values
524524

525-
elif is_datetime64_dtype(o) or isinstance(o, PeriodIndex):
525+
elif needs_i8_conversion(o):
526526
values[0:2] = pd.tslib.iNaT
527+
values = o._shallow_copy(values)
527528
else:
528529
values[0:2] = null_obj
529530
# check values has the same dtype as the original
531+
530532
self.assertEqual(values.dtype, o.dtype)
531533

532534
# create repeated values, 'n'th element is repeated by n+1
533535
# times
534-
if isinstance(o, PeriodIndex):
535-
# freq must be specified because repeat makes freq
536-
# ambiguous
536+
if isinstance(o, (DatetimeIndex, PeriodIndex)):
537+
expected_index = o.copy()
538+
expected_index.name = None
537539

538-
# resets name from Index
539-
expected_index = pd.Index(o, name=None)
540540
# attach name to klass
541-
o = klass(np.repeat(values, range(1, len(o) + 1)),
542-
freq=o.freq, name='a')
543-
elif isinstance(o, Index):
544-
expected_index = pd.Index(values, name=None)
545-
o = klass(
546-
np.repeat(values, range(1, len(o) + 1)), name='a')
541+
o = klass(values.repeat(range(1, len(o) + 1)))
542+
o.name = 'a'
547543
else:
548-
expected_index = pd.Index(values, name=None)
549-
idx = np.repeat(o.index.values, range(1, len(o) + 1))
550-
o = klass(
551-
np.repeat(values, range(
552-
1, len(o) + 1)), index=idx, name='a')
544+
if is_datetimetz(o):
545+
expected_index = orig._values._shallow_copy(values)
546+
else:
547+
expected_index = pd.Index(values)
548+
expected_index.name = None
549+
o = o.repeat(range(1, len(o) + 1))
550+
o.name = 'a'
551+
552+
# check values has the same dtype as the original
553+
self.assertEqual(o.dtype, orig.dtype)
554+
# check values correctly have NaN
555+
nanloc = np.zeros(len(o), dtype=np.bool)
556+
nanloc[:3] = True
557+
if isinstance(o, Index):
558+
self.assert_numpy_array_equal(pd.isnull(o), nanloc)
559+
else:
560+
exp = pd.Series(nanloc, o.index, name='a')
561+
self.assert_series_equal(pd.isnull(o), exp)
553562

554563
expected_s_na = Series(list(range(10, 2, -1)) + [3],
555564
index=expected_index[9:0:-1],
@@ -578,7 +587,9 @@ def test_value_counts_unique_nunique_null(self):
578587
self.assertIs(result[0], pd.NaT)
579588
else:
580589
tm.assert_numpy_array_equal(result[1:], values[2:])
590+
581591
self.assertTrue(pd.isnull(result[0]))
592+
self.assertEqual(result.dtype, orig.dtype)
582593

583594
self.assertEqual(o.nunique(), 8)
584595
self.assertEqual(o.nunique(dropna=False), 9)
@@ -942,18 +953,14 @@ def test_fillna(self):
942953
# # GH 11343
943954
# though Index.fillna and Series.fillna has separate impl,
944955
# test here to confirm these works as the same
945-
def get_fill_value(obj):
946-
if isinstance(obj, pd.tseries.base.DatetimeIndexOpsMixin):
947-
return obj.asobject.values[0]
948-
else:
949-
return obj.values[0]
950956

951-
for o in self.objs:
952-
klass = type(o)
957+
for orig in self.objs:
958+
959+
o = orig.copy()
953960
values = o.values
954961

955962
# values will not be changed
956-
result = o.fillna(get_fill_value(o))
963+
result = o.fillna(o.astype(object).values[0])
957964
if isinstance(o, Index):
958965
self.assert_index_equal(o, result)
959966
else:
@@ -962,33 +969,30 @@ def get_fill_value(obj):
962969
self.assertFalse(o is result)
963970

964971
for null_obj in [np.nan, None]:
965-
for o in self.objs:
972+
for orig in self.objs:
973+
o = orig.copy()
966974
klass = type(o)
967-
values = o.values.copy()
968975

969976
if not self._allow_na_ops(o):
970977
continue
971978

972-
# value for filling
973-
fill_value = get_fill_value(o)
979+
if needs_i8_conversion(o):
974980

975-
# special assign to the numpy array
976-
if o.values.dtype == 'datetime64[ns]' or isinstance(
977-
o, PeriodIndex):
978-
values[0:2] = pd.tslib.iNaT
981+
values = o.astype(object).values
982+
fill_value = values[0]
983+
values[0:2] = pd.NaT
979984
else:
985+
values = o.values.copy()
986+
fill_value = o.values[0]
980987
values[0:2] = null_obj
981988

982-
if isinstance(o, PeriodIndex):
983-
# freq must be specified because repeat makes freq
984-
# ambiguous
985-
expected = [fill_value.ordinal] * 2 + list(values[2:])
986-
expected = klass(ordinal=expected, freq=o.freq)
987-
o = klass(ordinal=values, freq=o.freq)
988-
else:
989-
expected = [fill_value] * 2 + list(values[2:])
990-
expected = klass(expected)
991-
o = klass(values)
989+
expected = [fill_value] * 2 + list(values[2:])
990+
991+
expected = klass(expected)
992+
o = klass(values)
993+
994+
# check values has the same dtype as the original
995+
self.assertEqual(o.dtype, orig.dtype)
992996

993997
result = o.fillna(fill_value)
994998
if isinstance(o, Index):

pandas/tseries/base.py

+2-5
Original file line numberDiff line numberDiff line change
@@ -362,6 +362,8 @@ def get_duplicates(self):
362362
values = Index.get_duplicates(self)
363363
return self._simple_new(values)
364364

365+
_can_hold_na = True
366+
365367
_na_value = tslib.NaT
366368
"""The expected NA value to use with this index."""
367369

@@ -370,11 +372,6 @@ def _isnan(self):
370372
""" return if each value is nan"""
371373
return (self.asi8 == tslib.iNaT)
372374

373-
@cache_readonly
374-
def hasnans(self):
375-
""" return if I have any nans; enables various perf speedups """
376-
return self._isnan.any()
377-
378375
@property
379376
def asobject(self):
380377
"""

pandas/tseries/period.py

+9
Original file line numberDiff line numberDiff line change
@@ -777,6 +777,15 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None):
777777
return Index.get_indexer(self._int64index, target, method,
778778
limit, tolerance)
779779

780+
def _get_unique_index(self, dropna=False):
781+
"""
782+
wrap Index._get_unique_index to handle NaT
783+
"""
784+
res = super(PeriodIndex, self)._get_unique_index(dropna=dropna)
785+
if dropna:
786+
res = res.dropna()
787+
return res
788+
780789
def get_loc(self, key, method=None, tolerance=None):
781790
"""
782791
Get integer location for requested label

pandas/tseries/tests/test_base.py

+54-5
Original file line numberDiff line numberDiff line change
@@ -555,8 +555,8 @@ def test_nonunique_contains(self):
555555

556556
def test_order(self):
557557
# with freq
558-
idx1 = DatetimeIndex(
559-
['2011-01-01', '2011-01-02', '2011-01-03'], freq='D', name='idx')
558+
idx1 = DatetimeIndex(['2011-01-01', '2011-01-02',
559+
'2011-01-03'], freq='D', name='idx')
560560
idx2 = DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00',
561561
'2011-01-01 11:00'], freq='H',
562562
tz='Asia/Tokyo', name='tzidx')
@@ -798,10 +798,27 @@ def test_shift(self):
798798
'2011-01-01 09:00'], name='xxx', tz=tz)
799799
tm.assert_index_equal(idx.shift(-3, freq='H'), exp)
800800

801-
def test_na_value(self):
801+
def test_nat(self):
802802
self.assertIs(pd.DatetimeIndex._na_value, pd.NaT)
803803
self.assertIs(pd.DatetimeIndex([])._na_value, pd.NaT)
804804

805+
for tz in [None, 'US/Eastern', 'UTC']:
806+
idx = pd.DatetimeIndex(['2011-01-01', '2011-01-02'], tz=tz)
807+
self.assertTrue(idx._can_hold_na)
808+
809+
tm.assert_numpy_array_equal(idx._isnan, np.array([False, False]))
810+
self.assertFalse(idx.hasnans)
811+
tm.assert_numpy_array_equal(idx._nan_idxs,
812+
np.array([], dtype=np.int64))
813+
814+
idx = pd.DatetimeIndex(['2011-01-01', 'NaT'], tz=tz)
815+
self.assertTrue(idx._can_hold_na)
816+
817+
tm.assert_numpy_array_equal(idx._isnan, np.array([False, True]))
818+
self.assertTrue(idx.hasnans)
819+
tm.assert_numpy_array_equal(idx._nan_idxs,
820+
np.array([1], dtype=np.int64))
821+
805822

806823
class TestTimedeltaIndexOps(Ops):
807824
def setUp(self):
@@ -1645,10 +1662,26 @@ def test_repeat(self):
16451662
tm.assert_index_equal(res, exp)
16461663
self.assertIsNone(res.freq)
16471664

1648-
def test_na_value(self):
1665+
def test_nat(self):
16491666
self.assertIs(pd.TimedeltaIndex._na_value, pd.NaT)
16501667
self.assertIs(pd.TimedeltaIndex([])._na_value, pd.NaT)
16511668

1669+
idx = pd.TimedeltaIndex(['1 days', '2 days'])
1670+
self.assertTrue(idx._can_hold_na)
1671+
1672+
tm.assert_numpy_array_equal(idx._isnan, np.array([False, False]))
1673+
self.assertFalse(idx.hasnans)
1674+
tm.assert_numpy_array_equal(idx._nan_idxs,
1675+
np.array([], dtype=np.int64))
1676+
1677+
idx = pd.TimedeltaIndex(['1 days', 'NaT'])
1678+
self.assertTrue(idx._can_hold_na)
1679+
1680+
tm.assert_numpy_array_equal(idx._isnan, np.array([False, True]))
1681+
self.assertTrue(idx.hasnans)
1682+
tm.assert_numpy_array_equal(idx._nan_idxs,
1683+
np.array([1], dtype=np.int64))
1684+
16521685

16531686
class TestPeriodIndexOps(Ops):
16541687
def setUp(self):
@@ -2593,10 +2626,26 @@ def test_repeat(self):
25932626
for res in [index.repeat(3), np.repeat(index, 3)]:
25942627
tm.assert_index_equal(res, exp)
25952628

2596-
def test_na_value(self):
2629+
def test_nat(self):
25972630
self.assertIs(pd.PeriodIndex._na_value, pd.NaT)
25982631
self.assertIs(pd.PeriodIndex([], freq='M')._na_value, pd.NaT)
25992632

2633+
idx = pd.PeriodIndex(['2011-01-01', '2011-01-02'], freq='D')
2634+
self.assertTrue(idx._can_hold_na)
2635+
2636+
tm.assert_numpy_array_equal(idx._isnan, np.array([False, False]))
2637+
self.assertFalse(idx.hasnans)
2638+
tm.assert_numpy_array_equal(idx._nan_idxs,
2639+
np.array([], dtype=np.int64))
2640+
2641+
idx = pd.PeriodIndex(['2011-01-01', 'NaT'], freq='D')
2642+
self.assertTrue(idx._can_hold_na)
2643+
2644+
tm.assert_numpy_array_equal(idx._isnan, np.array([False, True]))
2645+
self.assertTrue(idx.hasnans)
2646+
tm.assert_numpy_array_equal(idx._nan_idxs,
2647+
np.array([1], dtype=np.int64))
2648+
26002649

26012650
if __name__ == '__main__':
26022651
import nose

0 commit comments

Comments
 (0)