Skip to content

BUG: DatetimeIndex.asobject raises ValueError when contains NaT #7544

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 24, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/v0.14.1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ Bug Fixes
- BUG in ``resample`` raises ``ValueError`` when target contains ``NaT`` (:issue:`7227`)

- Bug in ``Timestamp.tz_localize`` resets ``nanosecond`` info (:issue:`7534`)

- Bug in ``DatetimeIndex.asobject`` raises ``ValueError`` when it contains ``NaT`` (:issue:`7539`)


- Bug in ``Index.astype(float)`` where it would return an ``object`` dtype
Expand Down
26 changes: 26 additions & 0 deletions pandas/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -376,3 +376,29 @@ def _ops_compat(self, name, op_accessor):
is_quarter_end = _field_accessor('is_quarter_end', "Logical indicating if last day of quarter (defined by frequency)")
is_year_start = _field_accessor('is_year_start', "Logical indicating if first day of year (defined by frequency)")
is_year_end = _field_accessor('is_year_end', "Logical indicating if last day of year (defined by frequency)")

@property
def _box_func(self):
"""
box function to get object from internal representation
"""
raise NotImplementedError

def _box_values(self, values):
"""
apply box func to passed values
"""
import pandas.lib as lib
return lib.map_infer(values, self._box_func)

@property
def asobject(self):
from pandas.core.index import Index
return Index(self._box_values(self.asi8), name=self.name, dtype=object)

def tolist(self):
"""
See ndarray.tolist
"""
return list(self.asobject)

70 changes: 70 additions & 0 deletions pandas/tests/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -515,6 +515,44 @@ def test_ops_properties_basic(self):
self.assertEquals(s.day,10)
self.assertRaises(AttributeError, lambda : s.weekday)

def test_asobject_tolist(self):
idx = pd.date_range(start='2013-01-01', periods=4, freq='M', name='idx')
expected_list = [pd.Timestamp('2013-01-31'), pd.Timestamp('2013-02-28'),
pd.Timestamp('2013-03-31'), pd.Timestamp('2013-04-30')]
expected = pd.Index(expected_list, dtype=object, name='idx')
result = idx.asobject
self.assertTrue(isinstance(result, Index))
self.assertEqual(result.dtype, object)
self.assertTrue(result.equals(expected))
self.assertEqual(result.name, expected.name)
self.assertEqual(idx.tolist(), expected_list)

idx = pd.date_range(start='2013-01-01', periods=4, freq='M', name='idx', tz='Asia/Tokyo')
expected_list = [pd.Timestamp('2013-01-31', tz='Asia/Tokyo'),
pd.Timestamp('2013-02-28', tz='Asia/Tokyo'),
pd.Timestamp('2013-03-31', tz='Asia/Tokyo'),
pd.Timestamp('2013-04-30', tz='Asia/Tokyo')]
expected = pd.Index(expected_list, dtype=object, name='idx')
result = idx.asobject
self.assertTrue(isinstance(result, Index))
self.assertEqual(result.dtype, object)
self.assertTrue(result.equals(expected))
self.assertEqual(result.name, expected.name)
self.assertEqual(idx.tolist(), expected_list)

idx = DatetimeIndex([datetime(2013, 1, 1), datetime(2013, 1, 2),
pd.NaT, datetime(2013, 1, 4)], name='idx')
expected_list = [pd.Timestamp('2013-01-01'), pd.Timestamp('2013-01-02'),
pd.NaT, pd.Timestamp('2013-01-04')]
expected = pd.Index(expected_list, dtype=object, name='idx')
result = idx.asobject
self.assertTrue(isinstance(result, Index))
self.assertEqual(result.dtype, object)
self.assertTrue(result.equals(expected))
self.assertEqual(result.name, expected.name)
self.assertEqual(idx.tolist(), expected_list)


class TestPeriodIndexOps(Ops):
_allowed = '_allow_period_index_ops'

Expand All @@ -528,6 +566,38 @@ def test_ops_properties(self):
self.check_ops_properties(['year','month','day','hour','minute','second','weekofyear','week','dayofweek','dayofyear','quarter'])
self.check_ops_properties(['qyear'], lambda x: isinstance(x,PeriodIndex))

def test_asobject_tolist(self):
idx = pd.period_range(start='2013-01-01', periods=4, freq='M', name='idx')
expected_list = [pd.Period('2013-01-31', freq='M'), pd.Period('2013-02-28', freq='M'),
pd.Period('2013-03-31', freq='M'), pd.Period('2013-04-30', freq='M')]
expected = pd.Index(expected_list, dtype=object, name='idx')
result = idx.asobject
self.assertTrue(isinstance(result, Index))
self.assertEqual(result.dtype, object)
self.assertTrue(result.equals(expected))
self.assertEqual(result.name, expected.name)
self.assertEqual(idx.tolist(), expected_list)

idx = PeriodIndex(['2013-01-01', '2013-01-02', 'NaT', '2013-01-04'], freq='D', name='idx')
expected_list = [pd.Period('2013-01-01', freq='D'), pd.Period('2013-01-02', freq='D'),
pd.Period('NaT', freq='D'), pd.Period('2013-01-04', freq='D')]
expected = pd.Index(expected_list, dtype=object, name='idx')
result = idx.asobject
self.assertTrue(isinstance(result, Index))
self.assertEqual(result.dtype, object)
for i in [0, 1, 3]:
self.assertTrue(result[i], expected[i])
self.assertTrue(result[2].ordinal, pd.tslib.iNaT)
self.assertTrue(result[2].freq, 'D')
self.assertEqual(result.name, expected.name)

result_list = idx.tolist()
for i in [0, 1, 3]:
self.assertTrue(result_list[i], expected_list[i])
self.assertTrue(result_list[2].ordinal, pd.tslib.iNaT)
self.assertTrue(result_list[2].freq, 'D')


if __name__ == '__main__':
import nose

Expand Down
28 changes: 14 additions & 14 deletions pandas/tests/test_multilevel.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ def test_append(self):
tm.assert_series_equal(result, self.frame['A'])

def test_append_index(self):
tm._skip_if_no_pytz()

idx1 = Index([1.1, 1.2, 1.3])
idx2 = pd.date_range('2011-01-01', freq='D', periods=3, tz='Asia/Tokyo')
Expand All @@ -81,17 +82,18 @@ def test_append_index(self):
midx_lv3 = MultiIndex.from_arrays([idx1, idx2, idx3])

result = idx1.append(midx_lv2)
expected = Index([1.1, 1.2, 1.3,
(1.1, datetime.datetime(2010, 12, 31, 15, 0)),
(1.2, datetime.datetime(2011, 1, 1, 15, 0)),
(1.3, datetime.datetime(2011, 1, 2, 15, 0))])

# GH 7112
import pytz
tz = pytz.timezone('Asia/Tokyo')
expected_tuples = [(1.1, datetime.datetime(2011, 1, 1, tzinfo=tz)),
(1.2, datetime.datetime(2011, 1, 2, tzinfo=tz)),
(1.3, datetime.datetime(2011, 1, 3, tzinfo=tz))]
expected = Index([1.1, 1.2, 1.3] + expected_tuples)
self.assert_(result.equals(expected))

result = midx_lv2.append(idx1)
expected = Index([(1.1, datetime.datetime(2010, 12, 31, 15, 0)),
(1.2, datetime.datetime(2011, 1, 1, 15, 0)),
(1.3, datetime.datetime(2011, 1, 2, 15, 0)),
1.1, 1.2, 1.3])
expected = Index(expected_tuples + [1.1, 1.2, 1.3])
self.assert_(result.equals(expected))

result = midx_lv2.append(midx_lv2)
Expand All @@ -103,12 +105,10 @@ def test_append_index(self):

result = midx_lv3.append(midx_lv2)
expected = Index._simple_new(
np.array([(1.1, datetime.datetime(2010, 12, 31, 15, 0), 'A'),
(1.2, datetime.datetime(2011, 1, 1, 15, 0), 'B'),
(1.3, datetime.datetime(2011, 1, 2, 15, 0), 'C'),
(1.1, datetime.datetime(2010, 12, 31, 15, 0)),
(1.2, datetime.datetime(2011, 1, 1, 15, 0)),
(1.3, datetime.datetime(2011, 1, 2, 15, 0))]), None)
np.array([(1.1, datetime.datetime(2011, 1, 1, tzinfo=tz), 'A'),
(1.2, datetime.datetime(2011, 1, 2, tzinfo=tz), 'B'),
(1.3, datetime.datetime(2011, 1, 3, tzinfo=tz), 'C')]
+ expected_tuples), None)
self.assert_(result.equals(expected))

def test_dataframe_constructor(self):
Expand Down
41 changes: 10 additions & 31 deletions pandas/tseries/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,15 @@

import numpy as np

from pandas.core.common import (isnull, _NS_DTYPE, _INT64_DTYPE,
is_list_like,_values_from_object, _maybe_box,
notnull, ABCSeries)
from pandas.core.index import Index, Int64Index, _Identity, Float64Index
from pandas.core.common import (_NS_DTYPE, _INT64_DTYPE,
_values_from_object, _maybe_box,
ABCSeries)
from pandas.core.index import Index, Int64Index, Float64Index
import pandas.compat as compat
from pandas.compat import u
from pandas.tseries.frequencies import (
infer_freq, to_offset, get_period_alias,
Resolution, get_reso_string, get_offset)
Resolution, get_reso_string)
from pandas.core.base import DatetimeIndexOpsMixin
from pandas.tseries.offsets import DateOffset, generate_range, Tick, CDay
from pandas.tseries.tools import parse_time_string, normalize_date
Expand All @@ -29,7 +29,6 @@
import pandas.algos as _algos
import pandas.index as _index

from pandas.tslib import isleapyear

def _utc():
import pytz
Expand Down Expand Up @@ -452,8 +451,9 @@ def _generate(cls, start, end, periods, name, offset,

return index

def _box_values(self, values):
return lib.map_infer(values, lib.Timestamp)
@property
def _box_func(self):
return lambda x: Timestamp(x, offset=self.offset, tz=self.tz)

def _local_timestamps(self):
utc = _utc()
Expand Down Expand Up @@ -673,7 +673,7 @@ def _format_with_header(self, header, **kwargs):

def _format_native_types(self, na_rep=u('NaT'),
date_format=None, **kwargs):
data = self._get_object_index()
data = self.asobject
from pandas.core.format import Datetime64Formatter
return Datetime64Formatter(values=data,
nat_rep=na_rep,
Expand Down Expand Up @@ -778,27 +778,6 @@ def _to_embed(self, keep_tz=False):
return self.asobject.values
return self.values

@property
def asobject(self):
"""
Convert to Index of datetime objects
"""
if isnull(self).any():
msg = 'DatetimeIndex with NaT cannot be converted to object'
raise ValueError(msg)
return self._get_object_index()

def tolist(self):
"""
See ndarray.tolist
"""
return list(self.asobject)

def _get_object_index(self):
boxfunc = lambda x: Timestamp(x, offset=self.offset, tz=self.tz)
boxed_values = lib.map_infer(self.asi8, boxfunc)
return Index(boxed_values, dtype=object, name=self.name)

def to_pydatetime(self):
"""
Return DatetimeIndex as object ndarray of datetime.datetime objects
Expand Down Expand Up @@ -1515,7 +1494,7 @@ def normalize(self):
tz=self.tz)

def __iter__(self):
return iter(self._get_object_index())
return iter(self.asobject)

def searchsorted(self, key, side='left'):
if isinstance(key, np.ndarray):
Expand Down
16 changes: 3 additions & 13 deletions pandas/tseries/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -723,9 +723,9 @@ def __contains__(self, key):
return False
return key.ordinal in self._engine

def _box_values(self, values):
f = lambda x: Period(ordinal=x, freq=self.freq)
return lib.map_infer(values, f)
@property
def _box_func(self):
return lambda x: Period(ordinal=x, freq=self.freq)

def asof_locs(self, where, mask):
"""
Expand All @@ -747,10 +747,6 @@ def asof_locs(self, where, mask):

return result

@property
def asobject(self):
return Index(self._box_values(self.values), name=self.name, dtype=object)

def _array_values(self):
return self.asobject

Expand Down Expand Up @@ -854,12 +850,6 @@ def equals(self, other):

return np.array_equal(self.asi8, other.asi8)

def tolist(self):
"""
Return a list of Period objects
"""
return self._get_object_array().tolist()

def to_timestamp(self, freq=None, how='start'):
"""
Cast to DatetimeIndex
Expand Down
12 changes: 0 additions & 12 deletions pandas/tseries/tests/test_period.py
Original file line number Diff line number Diff line change
Expand Up @@ -1448,18 +1448,6 @@ def test_to_timestamp_period_nat(self):
self.assertTrue(result2.equals(index))
self.assertEqual(result2.name, 'idx')

def test_asobject_period_nat(self):
index = PeriodIndex(['NaT', '2011-01', '2011-02'], freq='M', name='idx')

result = index.asobject
self.assertTrue(isinstance(result, Index))
self.assertEqual(result.dtype, object)
self.assertTrue(isinstance(result[0], Period))
self.assertEqual(result[0].ordinal, tslib.iNaT)
self.assertEqual(result[1], Period('2011-01', freq='M'))
self.assertEqual(result[2], Period('2011-02', freq='M'))
self.assertEqual(result.name, 'idx')

def test_as_frame_columns(self):
rng = period_range('1/1/2000', periods=5)
df = DataFrame(randn(10, 5), columns=rng)
Expand Down
12 changes: 1 addition & 11 deletions pandas/tseries/tests/test_timeseries.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# pylint: disable-msg=E1101,W0612
from datetime import datetime, time, timedelta, date
from datetime import datetime, time, timedelta
import sys
import operator

Expand Down Expand Up @@ -2363,16 +2363,6 @@ def test_order(self):
self.assertTrue(ordered[::-1].is_monotonic)
self.assert_numpy_array_equal(dexer, [0, 2, 1])

def test_asobject(self):
idx = date_range(start='2013-01-01', periods=4, freq='M', name='idx')
expected = Index([Timestamp('2013-01-31'), Timestamp('2013-02-28'),
Timestamp('2013-03-31'), Timestamp('2013-04-30')],
dtype=object, name='idx')

result = idx.asobject
self.assertTrue(result.equals(expected))
self.assertEqual(result.name, expected.name)

def test_insert(self):
idx = DatetimeIndex(['2000-01-04', '2000-01-01', '2000-01-02'], name='idx')

Expand Down