Skip to content

CLN: Datetimelike._can_hold_na #13983

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Sep 1, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 11 additions & 2 deletions pandas/tests/indexes/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from pandas import (Series, Index, Float64Index, Int64Index, RangeIndex,
MultiIndex, CategoricalIndex, DatetimeIndex,
TimedeltaIndex, PeriodIndex, notnull)
from pandas.types.common import needs_i8_conversion
from pandas.util.testing import assertRaisesRegexp

import pandas.util.testing as tm
Expand Down Expand Up @@ -319,13 +320,21 @@ def test_get_unique_index(self):
if not ind._can_hold_na:
continue

vals = ind.values[[0] * 5]
vals[0] = np.nan
if needs_i8_conversion(ind):
vals = ind.asi8[[0] * 5]
vals[0] = pd.tslib.iNaT
else:
vals = ind.values[[0] * 5]
vals[0] = np.nan

vals_unique = vals[:2]
idx_nan = ind._shallow_copy(vals)
idx_unique_nan = ind._shallow_copy(vals_unique)
self.assertTrue(idx_unique_nan.is_unique)

self.assertEqual(idx_nan.dtype, ind.dtype)
self.assertEqual(idx_unique_nan.dtype, ind.dtype)

for dropna, expected in zip([False, True],
[idx_unique_nan, idx_unique]):
for i in [idx_nan, idx_unique_nan]:
Expand Down
100 changes: 52 additions & 48 deletions pandas/tests/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,15 @@

import pandas as pd
import pandas.compat as compat
from pandas.types.common import is_object_dtype, is_datetimetz
from pandas.types.common import (is_object_dtype, is_datetimetz,
needs_i8_conversion)
import pandas.util.testing as tm
from pandas import (Series, Index, DatetimeIndex, TimedeltaIndex, PeriodIndex,
Timedelta)
from pandas.compat import u, StringIO
from pandas.compat.numpy import np_array_datetime64_compat
from pandas.core.base import (FrozenList, FrozenNDArray, PandasDelegate,
NoNewAttributesMixin)
from pandas.types.common import is_datetime64_dtype
from pandas.tseries.base import DatetimeIndexOpsMixin


Expand Down Expand Up @@ -450,7 +450,6 @@ def test_nanops(self):

def test_value_counts_unique_nunique(self):
for orig in self.objs:

o = orig.copy()
klass = type(o)
values = o._values
Expand Down Expand Up @@ -504,9 +503,10 @@ def test_value_counts_unique_nunique(self):
def test_value_counts_unique_nunique_null(self):

for null_obj in [np.nan, None]:
for o in self.objs:
for orig in self.objs:
o = orig.copy()
klass = type(o)
values = o.values
values = o._values

if not self._allow_na_ops(o):
continue
Expand All @@ -522,34 +522,43 @@ def test_value_counts_unique_nunique_null(self):
o[0:2] = pd.tslib.iNaT
values = o._values

elif is_datetime64_dtype(o) or isinstance(o, PeriodIndex):
elif needs_i8_conversion(o):
values[0:2] = pd.tslib.iNaT
values = o._shallow_copy(values)
else:
values[0:2] = null_obj
# check values has the same dtype as the original

self.assertEqual(values.dtype, o.dtype)

# create repeated values, 'n'th element is repeated by n+1
# times
if isinstance(o, PeriodIndex):
# freq must be specified because repeat makes freq
# ambiguous
if isinstance(o, (DatetimeIndex, PeriodIndex)):
expected_index = o.copy()
expected_index.name = None

# resets name from Index
expected_index = pd.Index(o, name=None)
# attach name to klass
o = klass(np.repeat(values, range(1, len(o) + 1)),
freq=o.freq, name='a')
elif isinstance(o, Index):
expected_index = pd.Index(values, name=None)
o = klass(
np.repeat(values, range(1, len(o) + 1)), name='a')
o = klass(values.repeat(range(1, len(o) + 1)))
o.name = 'a'
else:
expected_index = pd.Index(values, name=None)
idx = np.repeat(o.index.values, range(1, len(o) + 1))
o = klass(
np.repeat(values, range(
1, len(o) + 1)), index=idx, name='a')
if is_datetimetz(o):
expected_index = orig._values._shallow_copy(values)
else:
expected_index = pd.Index(values)
expected_index.name = None
o = o.repeat(range(1, len(o) + 1))
o.name = 'a'

# check values has the same dtype as the original
self.assertEqual(o.dtype, orig.dtype)
# check values correctly have NaN
nanloc = np.zeros(len(o), dtype=np.bool)
nanloc[:3] = True
if isinstance(o, Index):
self.assert_numpy_array_equal(pd.isnull(o), nanloc)
else:
exp = pd.Series(nanloc, o.index, name='a')
self.assert_series_equal(pd.isnull(o), exp)

expected_s_na = Series(list(range(10, 2, -1)) + [3],
index=expected_index[9:0:-1],
Expand Down Expand Up @@ -578,7 +587,9 @@ def test_value_counts_unique_nunique_null(self):
self.assertIs(result[0], pd.NaT)
else:
tm.assert_numpy_array_equal(result[1:], values[2:])

self.assertTrue(pd.isnull(result[0]))
self.assertEqual(result.dtype, orig.dtype)

self.assertEqual(o.nunique(), 8)
self.assertEqual(o.nunique(dropna=False), 9)
Expand Down Expand Up @@ -942,18 +953,14 @@ def test_fillna(self):
# # GH 11343
# though Index.fillna and Series.fillna has separate impl,
# test here to confirm these works as the same
def get_fill_value(obj):
if isinstance(obj, pd.tseries.base.DatetimeIndexOpsMixin):
return obj.asobject.values[0]
else:
return obj.values[0]

for o in self.objs:
klass = type(o)
for orig in self.objs:

o = orig.copy()
values = o.values

# values will not be changed
result = o.fillna(get_fill_value(o))
result = o.fillna(o.astype(object).values[0])
if isinstance(o, Index):
self.assert_index_equal(o, result)
else:
Expand All @@ -962,33 +969,30 @@ def get_fill_value(obj):
self.assertFalse(o is result)

for null_obj in [np.nan, None]:
for o in self.objs:
for orig in self.objs:
o = orig.copy()
klass = type(o)
values = o.values.copy()

if not self._allow_na_ops(o):
continue

# value for filling
fill_value = get_fill_value(o)
if needs_i8_conversion(o):

# special assign to the numpy array
if o.values.dtype == 'datetime64[ns]' or isinstance(
o, PeriodIndex):
values[0:2] = pd.tslib.iNaT
values = o.astype(object).values
fill_value = values[0]
values[0:2] = pd.NaT
else:
values = o.values.copy()
fill_value = o.values[0]
values[0:2] = null_obj

if isinstance(o, PeriodIndex):
# freq must be specified because repeat makes freq
# ambiguous
expected = [fill_value.ordinal] * 2 + list(values[2:])
expected = klass(ordinal=expected, freq=o.freq)
o = klass(ordinal=values, freq=o.freq)
else:
expected = [fill_value] * 2 + list(values[2:])
expected = klass(expected)
o = klass(values)
expected = [fill_value] * 2 + list(values[2:])

expected = klass(expected)
o = klass(values)

# check values has the same dtype as the original
self.assertEqual(o.dtype, orig.dtype)

result = o.fillna(fill_value)
if isinstance(o, Index):
Expand Down
7 changes: 2 additions & 5 deletions pandas/tseries/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -362,6 +362,8 @@ def get_duplicates(self):
values = Index.get_duplicates(self)
return self._simple_new(values)

_can_hold_na = True

_na_value = tslib.NaT
"""The expected NA value to use with this index."""

Expand All @@ -370,11 +372,6 @@ def _isnan(self):
""" return if each value is nan"""
return (self.asi8 == tslib.iNaT)

@cache_readonly
def hasnans(self):
""" return if I have any nans; enables various perf speedups """
return self._isnan.any()

@property
def asobject(self):
"""
Expand Down
9 changes: 9 additions & 0 deletions pandas/tseries/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -777,6 +777,15 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None):
return Index.get_indexer(self._int64index, target, method,
limit, tolerance)

def _get_unique_index(self, dropna=False):
"""
wrap Index._get_unique_index to handle NaT
"""
res = super(PeriodIndex, self)._get_unique_index(dropna=dropna)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

shouldn't this be handled in the super?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is needed to handle NaT. #13979 and #13984 makes _get_unique_index itself unnecessary.

if dropna:
res = res.dropna()
return res

def get_loc(self, key, method=None, tolerance=None):
"""
Get integer location for requested label
Expand Down
59 changes: 54 additions & 5 deletions pandas/tseries/tests/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -555,8 +555,8 @@ def test_nonunique_contains(self):

def test_order(self):
# with freq
idx1 = DatetimeIndex(
['2011-01-01', '2011-01-02', '2011-01-03'], freq='D', name='idx')
idx1 = DatetimeIndex(['2011-01-01', '2011-01-02',
'2011-01-03'], freq='D', name='idx')
idx2 = DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00',
'2011-01-01 11:00'], freq='H',
tz='Asia/Tokyo', name='tzidx')
Expand Down Expand Up @@ -798,10 +798,27 @@ def test_shift(self):
'2011-01-01 09:00'], name='xxx', tz=tz)
tm.assert_index_equal(idx.shift(-3, freq='H'), exp)

def test_na_value(self):
def test_nat(self):
self.assertIs(pd.DatetimeIndex._na_value, pd.NaT)
self.assertIs(pd.DatetimeIndex([])._na_value, pd.NaT)

for tz in [None, 'US/Eastern', 'UTC']:
idx = pd.DatetimeIndex(['2011-01-01', '2011-01-02'], tz=tz)
self.assertTrue(idx._can_hold_na)

tm.assert_numpy_array_equal(idx._isnan, np.array([False, False]))
self.assertFalse(idx.hasnans)
tm.assert_numpy_array_equal(idx._nan_idxs,
np.array([], dtype=np.int64))

idx = pd.DatetimeIndex(['2011-01-01', 'NaT'], tz=tz)
self.assertTrue(idx._can_hold_na)

tm.assert_numpy_array_equal(idx._isnan, np.array([False, True]))
self.assertTrue(idx.hasnans)
tm.assert_numpy_array_equal(idx._nan_idxs,
np.array([1], dtype=np.int64))


class TestTimedeltaIndexOps(Ops):
def setUp(self):
Expand Down Expand Up @@ -1645,10 +1662,26 @@ def test_repeat(self):
tm.assert_index_equal(res, exp)
self.assertIsNone(res.freq)

def test_na_value(self):
def test_nat(self):
self.assertIs(pd.TimedeltaIndex._na_value, pd.NaT)
self.assertIs(pd.TimedeltaIndex([])._na_value, pd.NaT)

idx = pd.TimedeltaIndex(['1 days', '2 days'])
self.assertTrue(idx._can_hold_na)

tm.assert_numpy_array_equal(idx._isnan, np.array([False, False]))
self.assertFalse(idx.hasnans)
tm.assert_numpy_array_equal(idx._nan_idxs,
np.array([], dtype=np.int64))

idx = pd.TimedeltaIndex(['1 days', 'NaT'])
self.assertTrue(idx._can_hold_na)

tm.assert_numpy_array_equal(idx._isnan, np.array([False, True]))
self.assertTrue(idx.hasnans)
tm.assert_numpy_array_equal(idx._nan_idxs,
np.array([1], dtype=np.int64))


class TestPeriodIndexOps(Ops):
def setUp(self):
Expand Down Expand Up @@ -2593,10 +2626,26 @@ def test_repeat(self):
for res in [index.repeat(3), np.repeat(index, 3)]:
tm.assert_index_equal(res, exp)

def test_na_value(self):
def test_nat(self):
self.assertIs(pd.PeriodIndex._na_value, pd.NaT)
self.assertIs(pd.PeriodIndex([], freq='M')._na_value, pd.NaT)

idx = pd.PeriodIndex(['2011-01-01', '2011-01-02'], freq='D')
self.assertTrue(idx._can_hold_na)

tm.assert_numpy_array_equal(idx._isnan, np.array([False, False]))
self.assertFalse(idx.hasnans)
tm.assert_numpy_array_equal(idx._nan_idxs,
np.array([], dtype=np.int64))

idx = pd.PeriodIndex(['2011-01-01', 'NaT'], freq='D')
self.assertTrue(idx._can_hold_na)

tm.assert_numpy_array_equal(idx._isnan, np.array([False, True]))
self.assertTrue(idx.hasnans)
tm.assert_numpy_array_equal(idx._nan_idxs,
np.array([1], dtype=np.int64))


if __name__ == '__main__':
import nose
Expand Down