pandas-dev · jorisvandenbossche · Sep 1, 2016 · Aug 12, 2016 · jreback · Aug 27, 2016
diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py
@@ -8,6 +8,7 @@
 from pandas import (Series, Index, Float64Index, Int64Index, RangeIndex,
                     MultiIndex, CategoricalIndex, DatetimeIndex,
                     TimedeltaIndex, PeriodIndex, notnull)
+from pandas.types.common import needs_i8_conversion
 from pandas.util.testing import assertRaisesRegexp
 
 import pandas.util.testing as tm
@@ -319,13 +320,21 @@ def test_get_unique_index(self):
             if not ind._can_hold_na:
                 continue
 
-            vals = ind.values[[0] * 5]
-            vals[0] = np.nan
+            if needs_i8_conversion(ind):
+                vals = ind.asi8[[0] * 5]
+                vals[0] = pd.tslib.iNaT
+            else:
+                vals = ind.values[[0] * 5]
+                vals[0] = np.nan
+
             vals_unique = vals[:2]
             idx_nan = ind._shallow_copy(vals)
             idx_unique_nan = ind._shallow_copy(vals_unique)
             self.assertTrue(idx_unique_nan.is_unique)
 
+            self.assertEqual(idx_nan.dtype, ind.dtype)
+            self.assertEqual(idx_unique_nan.dtype, ind.dtype)
+
             for dropna, expected in zip([False, True],
                                         [idx_unique_nan, idx_unique]):
                 for i in [idx_nan, idx_unique_nan]:

diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py
@@ -9,15 +9,15 @@
 
 import pandas as pd
 import pandas.compat as compat
-from pandas.types.common import is_object_dtype, is_datetimetz
+from pandas.types.common import (is_object_dtype, is_datetimetz,
+                                 needs_i8_conversion)
 import pandas.util.testing as tm
 from pandas import (Series, Index, DatetimeIndex, TimedeltaIndex, PeriodIndex,
                     Timedelta)
 from pandas.compat import u, StringIO
 from pandas.compat.numpy import np_array_datetime64_compat
 from pandas.core.base import (FrozenList, FrozenNDArray, PandasDelegate,
                               NoNewAttributesMixin)
-from pandas.types.common import is_datetime64_dtype
 from pandas.tseries.base import DatetimeIndexOpsMixin
 
 
@@ -450,7 +450,6 @@ def test_nanops(self):
 
     def test_value_counts_unique_nunique(self):
         for orig in self.objs:
-
             o = orig.copy()
             klass = type(o)
             values = o._values
@@ -504,9 +503,10 @@ def test_value_counts_unique_nunique(self):
     def test_value_counts_unique_nunique_null(self):
 
         for null_obj in [np.nan, None]:
-            for o in self.objs:
+            for orig in self.objs:
+                o = orig.copy()
                 klass = type(o)
-                values = o.values
+                values = o._values
 
                 if not self._allow_na_ops(o):
                     continue
@@ -522,34 +522,43 @@ def test_value_counts_unique_nunique_null(self):
                         o[0:2] = pd.tslib.iNaT
                         values = o._values
 
-                elif is_datetime64_dtype(o) or isinstance(o, PeriodIndex):
+                elif needs_i8_conversion(o):
                     values[0:2] = pd.tslib.iNaT
+                    values = o._shallow_copy(values)
                 else:
                     values[0:2] = null_obj
                 # check values has the same dtype as the original
+
                 self.assertEqual(values.dtype, o.dtype)
 
                 # create repeated values, 'n'th element is repeated by n+1
                 # times
-                if isinstance(o, PeriodIndex):
-                    # freq must be specified because repeat makes freq
-                    # ambiguous
+                if isinstance(o, (DatetimeIndex, PeriodIndex)):
+                    expected_index = o.copy()
+                    expected_index.name = None
 
-                    # resets name from Index
-                    expected_index = pd.Index(o, name=None)
                     # attach name to klass
-                    o = klass(np.repeat(values, range(1, len(o) + 1)),
-                              freq=o.freq, name='a')
-                elif isinstance(o, Index):
-                    expected_index = pd.Index(values, name=None)
-                    o = klass(
-                        np.repeat(values, range(1, len(o) + 1)), name='a')
+                    o = klass(values.repeat(range(1, len(o) + 1)))
+                    o.name = 'a'
                 else:
-                    expected_index = pd.Index(values, name=None)
-                    idx = np.repeat(o.index.values, range(1, len(o) + 1))
-                    o = klass(
-                        np.repeat(values, range(
-                            1, len(o) + 1)), index=idx, name='a')
+                    if is_datetimetz(o):
+                        expected_index = orig._values._shallow_copy(values)
+                    else:
+                        expected_index = pd.Index(values)
+                    expected_index.name = None
+                    o = o.repeat(range(1, len(o) + 1))
+                    o.name = 'a'
+
+                # check values has the same dtype as the original
+                self.assertEqual(o.dtype, orig.dtype)
+                # check values correctly have NaN
+                nanloc = np.zeros(len(o), dtype=np.bool)
+                nanloc[:3] = True
+                if isinstance(o, Index):
+                    self.assert_numpy_array_equal(pd.isnull(o), nanloc)
+                else:
+                    exp = pd.Series(nanloc, o.index, name='a')
+                    self.assert_series_equal(pd.isnull(o), exp)
 
                 expected_s_na = Series(list(range(10, 2, -1)) + [3],
                                        index=expected_index[9:0:-1],
@@ -578,7 +587,9 @@ def test_value_counts_unique_nunique_null(self):
                     self.assertIs(result[0], pd.NaT)
                 else:
                     tm.assert_numpy_array_equal(result[1:], values[2:])
+
                     self.assertTrue(pd.isnull(result[0]))
+                    self.assertEqual(result.dtype, orig.dtype)
 
                 self.assertEqual(o.nunique(), 8)
                 self.assertEqual(o.nunique(dropna=False), 9)
@@ -942,18 +953,14 @@ def test_fillna(self):
         # # GH 11343
         # though Index.fillna and Series.fillna has separate impl,
         # test here to confirm these works as the same
-        def get_fill_value(obj):
-            if isinstance(obj, pd.tseries.base.DatetimeIndexOpsMixin):
-                return obj.asobject.values[0]
-            else:
-                return obj.values[0]
 
-        for o in self.objs:
-            klass = type(o)
+        for orig in self.objs:
+
+            o = orig.copy()
             values = o.values
 
             # values will not be changed
-            result = o.fillna(get_fill_value(o))
+            result = o.fillna(o.astype(object).values[0])
             if isinstance(o, Index):
                 self.assert_index_equal(o, result)
             else:
@@ -962,33 +969,30 @@ def get_fill_value(obj):
             self.assertFalse(o is result)
 
         for null_obj in [np.nan, None]:
-            for o in self.objs:
+            for orig in self.objs:
+                o = orig.copy()
                 klass = type(o)
-                values = o.values.copy()
 
                 if not self._allow_na_ops(o):
                     continue
 
-                # value for filling
-                fill_value = get_fill_value(o)
+                if needs_i8_conversion(o):
 
-                # special assign to the numpy array
-                if o.values.dtype == 'datetime64[ns]' or isinstance(
-                        o, PeriodIndex):
-                    values[0:2] = pd.tslib.iNaT
+                    values = o.astype(object).values
+                    fill_value = values[0]
+                    values[0:2] = pd.NaT
                 else:
+                    values = o.values.copy()
+                    fill_value = o.values[0]
                     values[0:2] = null_obj
 
-                if isinstance(o, PeriodIndex):
-                    # freq must be specified because repeat makes freq
-                    # ambiguous
-                    expected = [fill_value.ordinal] * 2 + list(values[2:])
-                    expected = klass(ordinal=expected, freq=o.freq)
-                    o = klass(ordinal=values, freq=o.freq)
-                else:
-                    expected = [fill_value] * 2 + list(values[2:])
-                    expected = klass(expected)
-                    o = klass(values)
+                expected = [fill_value] * 2 + list(values[2:])
+
+                expected = klass(expected)
+                o = klass(values)
+
+                # check values has the same dtype as the original
+                self.assertEqual(o.dtype, orig.dtype)
 
                 result = o.fillna(fill_value)
                 if isinstance(o, Index):

diff --git a/pandas/tseries/base.py b/pandas/tseries/base.py
@@ -362,6 +362,8 @@ def get_duplicates(self):
         values = Index.get_duplicates(self)
         return self._simple_new(values)
 
+    _can_hold_na = True
+
     _na_value = tslib.NaT
     """The expected NA value to use with this index."""
 
@@ -370,11 +372,6 @@ def _isnan(self):
         """ return if each value is nan"""
         return (self.asi8 == tslib.iNaT)
 
-    @cache_readonly
-    def hasnans(self):
-        """ return if I have any nans; enables various perf speedups """
-        return self._isnan.any()
-
     @property
     def asobject(self):
         """

diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py
@@ -777,6 +777,15 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None):
         return Index.get_indexer(self._int64index, target, method,
                                  limit, tolerance)
 
+    def _get_unique_index(self, dropna=False):
+        """
+        wrap Index._get_unique_index to handle NaT
+        """
+        res = super(PeriodIndex, self)._get_unique_index(dropna=dropna)
+        if dropna:
+            res = res.dropna()
+        return res
+
     def get_loc(self, key, method=None, tolerance=None):
         """
         Get integer location for requested label

diff --git a/pandas/tseries/tests/test_base.py b/pandas/tseries/tests/test_base.py
@@ -555,8 +555,8 @@ def test_nonunique_contains(self):
 
     def test_order(self):
         # with freq
-        idx1 = DatetimeIndex(
-            ['2011-01-01', '2011-01-02', '2011-01-03'], freq='D', name='idx')
+        idx1 = DatetimeIndex(['2011-01-01', '2011-01-02',
+                              '2011-01-03'], freq='D', name='idx')
         idx2 = DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00',
                               '2011-01-01 11:00'], freq='H',
                              tz='Asia/Tokyo', name='tzidx')
@@ -798,10 +798,27 @@ def test_shift(self):
                                     '2011-01-01 09:00'], name='xxx', tz=tz)
             tm.assert_index_equal(idx.shift(-3, freq='H'), exp)
 
-    def test_na_value(self):
+    def test_nat(self):
         self.assertIs(pd.DatetimeIndex._na_value, pd.NaT)
         self.assertIs(pd.DatetimeIndex([])._na_value, pd.NaT)
 
+        for tz in [None, 'US/Eastern', 'UTC']:
+            idx = pd.DatetimeIndex(['2011-01-01', '2011-01-02'], tz=tz)
+            self.assertTrue(idx._can_hold_na)
+
+            tm.assert_numpy_array_equal(idx._isnan, np.array([False, False]))
+            self.assertFalse(idx.hasnans)
+            tm.assert_numpy_array_equal(idx._nan_idxs,
+                                        np.array([], dtype=np.int64))
+
+            idx = pd.DatetimeIndex(['2011-01-01', 'NaT'], tz=tz)
+            self.assertTrue(idx._can_hold_na)
+
+            tm.assert_numpy_array_equal(idx._isnan, np.array([False, True]))
+            self.assertTrue(idx.hasnans)
+            tm.assert_numpy_array_equal(idx._nan_idxs,
+                                        np.array([1], dtype=np.int64))
+
 
 class TestTimedeltaIndexOps(Ops):
     def setUp(self):
@@ -1645,10 +1662,26 @@ def test_repeat(self):
             tm.assert_index_equal(res, exp)
             self.assertIsNone(res.freq)
 
-    def test_na_value(self):
+    def test_nat(self):
         self.assertIs(pd.TimedeltaIndex._na_value, pd.NaT)
         self.assertIs(pd.TimedeltaIndex([])._na_value, pd.NaT)
 
+        idx = pd.TimedeltaIndex(['1 days', '2 days'])
+        self.assertTrue(idx._can_hold_na)
+
+        tm.assert_numpy_array_equal(idx._isnan, np.array([False, False]))
+        self.assertFalse(idx.hasnans)
+        tm.assert_numpy_array_equal(idx._nan_idxs,
+                                    np.array([], dtype=np.int64))
+
+        idx = pd.TimedeltaIndex(['1 days', 'NaT'])
+        self.assertTrue(idx._can_hold_na)
+
+        tm.assert_numpy_array_equal(idx._isnan, np.array([False, True]))
+        self.assertTrue(idx.hasnans)
+        tm.assert_numpy_array_equal(idx._nan_idxs,
+                                    np.array([1], dtype=np.int64))
+
 
 class TestPeriodIndexOps(Ops):
     def setUp(self):
@@ -2593,10 +2626,26 @@ def test_repeat(self):
         for res in [index.repeat(3), np.repeat(index, 3)]:
             tm.assert_index_equal(res, exp)
 
-    def test_na_value(self):
+    def test_nat(self):
         self.assertIs(pd.PeriodIndex._na_value, pd.NaT)
         self.assertIs(pd.PeriodIndex([], freq='M')._na_value, pd.NaT)
 
+        idx = pd.PeriodIndex(['2011-01-01', '2011-01-02'], freq='D')
+        self.assertTrue(idx._can_hold_na)
+
+        tm.assert_numpy_array_equal(idx._isnan, np.array([False, False]))
+        self.assertFalse(idx.hasnans)
+        tm.assert_numpy_array_equal(idx._nan_idxs,
+                                    np.array([], dtype=np.int64))
+
+        idx = pd.PeriodIndex(['2011-01-01', 'NaT'], freq='D')
+        self.assertTrue(idx._can_hold_na)
+
+        tm.assert_numpy_array_equal(idx._isnan, np.array([False, True]))
+        self.assertTrue(idx.hasnans)
+        tm.assert_numpy_array_equal(idx._nan_idxs,
+                                    np.array([1], dtype=np.int64))
+
 
 if __name__ == '__main__':
     import nose