diff --git a/doc/source/v0.14.1.txt b/doc/source/v0.14.1.txt index c41bc13b18606..91cb18d2accd1 100644 --- a/doc/source/v0.14.1.txt +++ b/doc/source/v0.14.1.txt @@ -248,7 +248,7 @@ Bug Fixes - BUG in ``resample`` raises ``ValueError`` when target contains ``NaT`` (:issue:`7227`) - Bug in ``Timestamp.tz_localize`` resets ``nanosecond`` info (:issue:`7534`) - +- Bug in ``DatetimeIndex.asobject`` raises ``ValueError`` when it contains ``NaT`` (:issue:`7539`) - Bug in ``Index.astype(float)`` where it would return an ``object`` dtype diff --git a/pandas/core/base.py b/pandas/core/base.py index b43883885e962..cc676b9682277 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -376,3 +376,29 @@ def _ops_compat(self, name, op_accessor): is_quarter_end = _field_accessor('is_quarter_end', "Logical indicating if last day of quarter (defined by frequency)") is_year_start = _field_accessor('is_year_start', "Logical indicating if first day of year (defined by frequency)") is_year_end = _field_accessor('is_year_end', "Logical indicating if last day of year (defined by frequency)") + + @property + def _box_func(self): + """ + box function to get object from internal representation + """ + raise NotImplementedError + + def _box_values(self, values): + """ + apply box func to passed values + """ + import pandas.lib as lib + return lib.map_infer(values, self._box_func) + + @property + def asobject(self): + from pandas.core.index import Index + return Index(self._box_values(self.asi8), name=self.name, dtype=object) + + def tolist(self): + """ + See ndarray.tolist + """ + return list(self.asobject) + diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index 6c8dd3478835f..291b10c70c83c 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -515,6 +515,44 @@ def test_ops_properties_basic(self): self.assertEquals(s.day,10) self.assertRaises(AttributeError, lambda : s.weekday) + def test_asobject_tolist(self): + idx = pd.date_range(start='2013-01-01', periods=4, freq='M', name='idx') + expected_list = [pd.Timestamp('2013-01-31'), pd.Timestamp('2013-02-28'), + pd.Timestamp('2013-03-31'), pd.Timestamp('2013-04-30')] + expected = pd.Index(expected_list, dtype=object, name='idx') + result = idx.asobject + self.assertTrue(isinstance(result, Index)) + self.assertEqual(result.dtype, object) + self.assertTrue(result.equals(expected)) + self.assertEqual(result.name, expected.name) + self.assertEqual(idx.tolist(), expected_list) + + idx = pd.date_range(start='2013-01-01', periods=4, freq='M', name='idx', tz='Asia/Tokyo') + expected_list = [pd.Timestamp('2013-01-31', tz='Asia/Tokyo'), + pd.Timestamp('2013-02-28', tz='Asia/Tokyo'), + pd.Timestamp('2013-03-31', tz='Asia/Tokyo'), + pd.Timestamp('2013-04-30', tz='Asia/Tokyo')] + expected = pd.Index(expected_list, dtype=object, name='idx') + result = idx.asobject + self.assertTrue(isinstance(result, Index)) + self.assertEqual(result.dtype, object) + self.assertTrue(result.equals(expected)) + self.assertEqual(result.name, expected.name) + self.assertEqual(idx.tolist(), expected_list) + + idx = DatetimeIndex([datetime(2013, 1, 1), datetime(2013, 1, 2), + pd.NaT, datetime(2013, 1, 4)], name='idx') + expected_list = [pd.Timestamp('2013-01-01'), pd.Timestamp('2013-01-02'), + pd.NaT, pd.Timestamp('2013-01-04')] + expected = pd.Index(expected_list, dtype=object, name='idx') + result = idx.asobject + self.assertTrue(isinstance(result, Index)) + self.assertEqual(result.dtype, object) + self.assertTrue(result.equals(expected)) + self.assertEqual(result.name, expected.name) + self.assertEqual(idx.tolist(), expected_list) + + class TestPeriodIndexOps(Ops): _allowed = '_allow_period_index_ops' @@ -528,6 +566,38 @@ def test_ops_properties(self): self.check_ops_properties(['year','month','day','hour','minute','second','weekofyear','week','dayofweek','dayofyear','quarter']) self.check_ops_properties(['qyear'], lambda x: isinstance(x,PeriodIndex)) + def test_asobject_tolist(self): + idx = pd.period_range(start='2013-01-01', periods=4, freq='M', name='idx') + expected_list = [pd.Period('2013-01-31', freq='M'), pd.Period('2013-02-28', freq='M'), + pd.Period('2013-03-31', freq='M'), pd.Period('2013-04-30', freq='M')] + expected = pd.Index(expected_list, dtype=object, name='idx') + result = idx.asobject + self.assertTrue(isinstance(result, Index)) + self.assertEqual(result.dtype, object) + self.assertTrue(result.equals(expected)) + self.assertEqual(result.name, expected.name) + self.assertEqual(idx.tolist(), expected_list) + + idx = PeriodIndex(['2013-01-01', '2013-01-02', 'NaT', '2013-01-04'], freq='D', name='idx') + expected_list = [pd.Period('2013-01-01', freq='D'), pd.Period('2013-01-02', freq='D'), + pd.Period('NaT', freq='D'), pd.Period('2013-01-04', freq='D')] + expected = pd.Index(expected_list, dtype=object, name='idx') + result = idx.asobject + self.assertTrue(isinstance(result, Index)) + self.assertEqual(result.dtype, object) + for i in [0, 1, 3]: + self.assertTrue(result[i], expected[i]) + self.assertTrue(result[2].ordinal, pd.tslib.iNaT) + self.assertTrue(result[2].freq, 'D') + self.assertEqual(result.name, expected.name) + + result_list = idx.tolist() + for i in [0, 1, 3]: + self.assertTrue(result_list[i], expected_list[i]) + self.assertTrue(result_list[2].ordinal, pd.tslib.iNaT) + self.assertTrue(result_list[2].freq, 'D') + + if __name__ == '__main__': import nose diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 8bc6f1a21b68a..c0ca5451ef1d2 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -72,6 +72,7 @@ def test_append(self): tm.assert_series_equal(result, self.frame['A']) def test_append_index(self): + tm._skip_if_no_pytz() idx1 = Index([1.1, 1.2, 1.3]) idx2 = pd.date_range('2011-01-01', freq='D', periods=3, tz='Asia/Tokyo') @@ -81,17 +82,18 @@ def test_append_index(self): midx_lv3 = MultiIndex.from_arrays([idx1, idx2, idx3]) result = idx1.append(midx_lv2) - expected = Index([1.1, 1.2, 1.3, - (1.1, datetime.datetime(2010, 12, 31, 15, 0)), - (1.2, datetime.datetime(2011, 1, 1, 15, 0)), - (1.3, datetime.datetime(2011, 1, 2, 15, 0))]) + + # GH 7112 + import pytz + tz = pytz.timezone('Asia/Tokyo') + expected_tuples = [(1.1, datetime.datetime(2011, 1, 1, tzinfo=tz)), + (1.2, datetime.datetime(2011, 1, 2, tzinfo=tz)), + (1.3, datetime.datetime(2011, 1, 3, tzinfo=tz))] + expected = Index([1.1, 1.2, 1.3] + expected_tuples) self.assert_(result.equals(expected)) result = midx_lv2.append(idx1) - expected = Index([(1.1, datetime.datetime(2010, 12, 31, 15, 0)), - (1.2, datetime.datetime(2011, 1, 1, 15, 0)), - (1.3, datetime.datetime(2011, 1, 2, 15, 0)), - 1.1, 1.2, 1.3]) + expected = Index(expected_tuples + [1.1, 1.2, 1.3]) self.assert_(result.equals(expected)) result = midx_lv2.append(midx_lv2) @@ -103,12 +105,10 @@ def test_append_index(self): result = midx_lv3.append(midx_lv2) expected = Index._simple_new( - np.array([(1.1, datetime.datetime(2010, 12, 31, 15, 0), 'A'), - (1.2, datetime.datetime(2011, 1, 1, 15, 0), 'B'), - (1.3, datetime.datetime(2011, 1, 2, 15, 0), 'C'), - (1.1, datetime.datetime(2010, 12, 31, 15, 0)), - (1.2, datetime.datetime(2011, 1, 1, 15, 0)), - (1.3, datetime.datetime(2011, 1, 2, 15, 0))]), None) + np.array([(1.1, datetime.datetime(2011, 1, 1, tzinfo=tz), 'A'), + (1.2, datetime.datetime(2011, 1, 2, tzinfo=tz), 'B'), + (1.3, datetime.datetime(2011, 1, 3, tzinfo=tz), 'C')] + + expected_tuples), None) self.assert_(result.equals(expected)) def test_dataframe_constructor(self): diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index 50296a417479e..ff585d80af830 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -6,15 +6,15 @@ import numpy as np -from pandas.core.common import (isnull, _NS_DTYPE, _INT64_DTYPE, - is_list_like,_values_from_object, _maybe_box, - notnull, ABCSeries) -from pandas.core.index import Index, Int64Index, _Identity, Float64Index +from pandas.core.common import (_NS_DTYPE, _INT64_DTYPE, + _values_from_object, _maybe_box, + ABCSeries) +from pandas.core.index import Index, Int64Index, Float64Index import pandas.compat as compat from pandas.compat import u from pandas.tseries.frequencies import ( infer_freq, to_offset, get_period_alias, - Resolution, get_reso_string, get_offset) + Resolution, get_reso_string) from pandas.core.base import DatetimeIndexOpsMixin from pandas.tseries.offsets import DateOffset, generate_range, Tick, CDay from pandas.tseries.tools import parse_time_string, normalize_date @@ -29,7 +29,6 @@ import pandas.algos as _algos import pandas.index as _index -from pandas.tslib import isleapyear def _utc(): import pytz @@ -452,8 +451,9 @@ def _generate(cls, start, end, periods, name, offset, return index - def _box_values(self, values): - return lib.map_infer(values, lib.Timestamp) + @property + def _box_func(self): + return lambda x: Timestamp(x, offset=self.offset, tz=self.tz) def _local_timestamps(self): utc = _utc() @@ -673,7 +673,7 @@ def _format_with_header(self, header, **kwargs): def _format_native_types(self, na_rep=u('NaT'), date_format=None, **kwargs): - data = self._get_object_index() + data = self.asobject from pandas.core.format import Datetime64Formatter return Datetime64Formatter(values=data, nat_rep=na_rep, @@ -778,27 +778,6 @@ def _to_embed(self, keep_tz=False): return self.asobject.values return self.values - @property - def asobject(self): - """ - Convert to Index of datetime objects - """ - if isnull(self).any(): - msg = 'DatetimeIndex with NaT cannot be converted to object' - raise ValueError(msg) - return self._get_object_index() - - def tolist(self): - """ - See ndarray.tolist - """ - return list(self.asobject) - - def _get_object_index(self): - boxfunc = lambda x: Timestamp(x, offset=self.offset, tz=self.tz) - boxed_values = lib.map_infer(self.asi8, boxfunc) - return Index(boxed_values, dtype=object, name=self.name) - def to_pydatetime(self): """ Return DatetimeIndex as object ndarray of datetime.datetime objects @@ -1515,7 +1494,7 @@ def normalize(self): tz=self.tz) def __iter__(self): - return iter(self._get_object_index()) + return iter(self.asobject) def searchsorted(self, key, side='left'): if isinstance(key, np.ndarray): diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index b3a29ab4110d7..d41438bbfd208 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -723,9 +723,9 @@ def __contains__(self, key): return False return key.ordinal in self._engine - def _box_values(self, values): - f = lambda x: Period(ordinal=x, freq=self.freq) - return lib.map_infer(values, f) + @property + def _box_func(self): + return lambda x: Period(ordinal=x, freq=self.freq) def asof_locs(self, where, mask): """ @@ -747,10 +747,6 @@ def asof_locs(self, where, mask): return result - @property - def asobject(self): - return Index(self._box_values(self.values), name=self.name, dtype=object) - def _array_values(self): return self.asobject @@ -854,12 +850,6 @@ def equals(self, other): return np.array_equal(self.asi8, other.asi8) - def tolist(self): - """ - Return a list of Period objects - """ - return self._get_object_array().tolist() - def to_timestamp(self, freq=None, how='start'): """ Cast to DatetimeIndex diff --git a/pandas/tseries/tests/test_period.py b/pandas/tseries/tests/test_period.py index af39bba8e43af..84c0c40de369a 100644 --- a/pandas/tseries/tests/test_period.py +++ b/pandas/tseries/tests/test_period.py @@ -1448,18 +1448,6 @@ def test_to_timestamp_period_nat(self): self.assertTrue(result2.equals(index)) self.assertEqual(result2.name, 'idx') - def test_asobject_period_nat(self): - index = PeriodIndex(['NaT', '2011-01', '2011-02'], freq='M', name='idx') - - result = index.asobject - self.assertTrue(isinstance(result, Index)) - self.assertEqual(result.dtype, object) - self.assertTrue(isinstance(result[0], Period)) - self.assertEqual(result[0].ordinal, tslib.iNaT) - self.assertEqual(result[1], Period('2011-01', freq='M')) - self.assertEqual(result[2], Period('2011-02', freq='M')) - self.assertEqual(result.name, 'idx') - def test_as_frame_columns(self): rng = period_range('1/1/2000', periods=5) df = DataFrame(randn(10, 5), columns=rng) diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py index 9eb8f9b30b957..11161308be279 100644 --- a/pandas/tseries/tests/test_timeseries.py +++ b/pandas/tseries/tests/test_timeseries.py @@ -1,5 +1,5 @@ # pylint: disable-msg=E1101,W0612 -from datetime import datetime, time, timedelta, date +from datetime import datetime, time, timedelta import sys import operator @@ -2363,16 +2363,6 @@ def test_order(self): self.assertTrue(ordered[::-1].is_monotonic) self.assert_numpy_array_equal(dexer, [0, 2, 1]) - def test_asobject(self): - idx = date_range(start='2013-01-01', periods=4, freq='M', name='idx') - expected = Index([Timestamp('2013-01-31'), Timestamp('2013-02-28'), - Timestamp('2013-03-31'), Timestamp('2013-04-30')], - dtype=object, name='idx') - - result = idx.asobject - self.assertTrue(result.equals(expected)) - self.assertEqual(result.name, expected.name) - def test_insert(self): idx = DatetimeIndex(['2000-01-04', '2000-01-01', '2000-01-02'], name='idx')