diff --git a/pandas/core/common.py b/pandas/core/common.py index 15f6cb6412c78..c7a0dd6c6e179 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -628,6 +628,28 @@ def _consensus_name_attr(objs): #---------------------------------------------------------------------- # Lots of little utilities +def _possibly_cast_to_datetime(value, dtype): + """ try to cast the array/value to a datetimelike dtype, converting float nan to iNaT """ + + if dtype == 'M8[ns]': + if np.isscalar(value): + if value == tslib.iNaT or isnull(value): + value = tslib.iNaT + else: + value = np.array(value) + + # have a scalar array-like (e.g. NaT) + if value.ndim == 0: + value = tslib.iNaT + + # we have an array of datetime & nulls + elif np.prod(value.shape): + try: + value = tslib.array_to_datetime(value) + except: + pass + + return value def _infer_dtype(value): if isinstance(value, (float, np.floating)): diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b3fef8943baf3..49b667b7f0bb4 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4218,7 +4218,13 @@ def applymap(self, func): ------- applied : DataFrame """ - return self.apply(lambda x: lib.map_infer(x, func)) + + # if we have a dtype == 'M8[ns]', provide boxed values + def infer(x): + if x.dtype == 'M8[ns]': + x = lib.map_infer(x, lib.Timestamp) + return lib.map_infer(x, func) + return self.apply(infer) #---------------------------------------------------------------------- # Merging / joining methods diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index f8c977a3b9015..53eb18c12f172 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -111,6 +111,7 @@ def _setitem_with_indexer(self, indexer, value): data = self.obj[item] values = data.values if np.prod(values.shape): + value = com._possibly_cast_to_datetime(value,getattr(data,'dtype',None)) values[plane_indexer] = value except ValueError: for item, v in zip(item_labels[het_idx], value): @@ -118,6 +119,7 @@ def _setitem_with_indexer(self, indexer, value): values = data.values if np.prod(values.shape): values[plane_indexer] = v + else: if isinstance(indexer, tuple): indexer = _maybe_convert_ix(*indexer) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 639141e4edba6..57844656bf113 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -446,6 +446,7 @@ def get_values(self, dtype): def make_block(values, items, ref_items): dtype = values.dtype vtype = dtype.type + klass = None if issubclass(vtype, np.floating): klass = FloatBlock @@ -459,7 +460,21 @@ def make_block(values, items, ref_items): klass = IntBlock elif dtype == np.bool_: klass = BoolBlock - else: + + # try to infer a datetimeblock + if klass is None and np.prod(values.shape): + flat = values.flatten() + inferred_type = lib.infer_dtype(flat) + if inferred_type == 'datetime': + + # we have an object array that has been inferred as datetime, so convert it + try: + values = tslib.array_to_datetime(flat).reshape(values.shape) + klass = DatetimeBlock + except: # it already object, so leave it + pass + + if klass is None: klass = ObjectBlock return klass(values, items, ref_items, ndim=values.ndim) diff --git a/pandas/core/series.py b/pandas/core/series.py index 6cf511d32bfb3..7ffdc1051ee63 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2983,12 +2983,13 @@ def _sanitize_array(data, index, dtype=None, copy=False, def _try_cast(arr): try: - subarr = np.array(data, dtype=dtype, copy=copy) + arr = com._possibly_cast_to_datetime(arr, dtype) + subarr = np.array(arr, dtype=dtype, copy=copy) except (ValueError, TypeError): if dtype is not None and raise_cast_failure: raise else: # pragma: no cover - subarr = np.array(data, dtype=object, copy=copy) + subarr = np.array(arr, dtype=object, copy=copy) return subarr # GH #846 @@ -3047,6 +3048,8 @@ def _try_cast(arr): value, dtype = _dtype_from_scalar(value) subarr = np.empty(len(index), dtype=dtype) else: + # need to possibly convert the value here + value = com._possibly_cast_to_datetime(value, dtype) subarr = np.empty(len(index), dtype=dtype) subarr.fill(value) else: diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index cf485f70ffbc8..462812296c9da 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -21,7 +21,7 @@ import pandas.core.format as fmt import pandas.core.datetools as datetools from pandas.core.api import (DataFrame, Index, Series, notnull, isnull, - MultiIndex, DatetimeIndex) + MultiIndex, DatetimeIndex, Timestamp) from pandas.io.parsers import read_csv from pandas.util.testing import (assert_almost_equal, @@ -1073,6 +1073,36 @@ def test_setitem_single_column_mixed(self): expected = [nan, 'qux', nan, 'qux', nan] assert_almost_equal(df['str'].values, expected) + def test_setitem_single_column_mixed_datetime(self): + df = DataFrame(randn(5, 3), index=['a', 'b', 'c', 'd', 'e'], + columns=['foo', 'bar', 'baz']) + + df['timestamp'] = Timestamp('20010102') + + # check our dtypes + result = df.get_dtype_counts() + expected = Series({ 'float64' : 3, 'datetime64[ns]' : 1}) + assert_series_equal(result, expected) + + # set an allowable datetime64 type + from pandas import tslib + df.ix['b','timestamp'] = tslib.iNaT + self.assert_(com.isnull(df.ix['b','timestamp'])) + + # allow this syntax + df.ix['c','timestamp'] = nan + self.assert_(com.isnull(df.ix['c','timestamp'])) + + # allow this syntax + df.ix['d',:] = nan + self.assert_(com.isnull(df.ix['c',:]).all() == False) + + # try to set with a list like item + self.assertRaises(Exception, df.ix.__setitem__, ('d','timestamp'), [nan]) + + # prior to 0.10.1 this failed + #self.assertRaises(TypeError, df.ix.__setitem__, ('c','timestamp'), nan) + def test_setitem_frame(self): piece = self.frame.ix[:2, ['A', 'B']] self.frame.ix[-2:, ['A', 'B']] = piece.values diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index 1b0065c18923f..111b1e69bb823 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -351,6 +351,26 @@ def test_constructor_dtype_nocast(self): s2[1] = 5 self.assertEquals(s[1], 5) + def test_constructor_dtype_datetime64(self): + import pandas.tslib as tslib + + s = Series(tslib.iNaT,dtype='M8[ns]',index=range(5)) + self.assert_(isnull(s).all() == True) + + s = Series(tslib.NaT,dtype='M8[ns]',index=range(5)) + self.assert_(isnull(s).all() == True) + + s = Series(nan,dtype='M8[ns]',index=range(5)) + self.assert_(isnull(s).all() == True) + + s = Series([ datetime(2001,1,2,0,0), tslib.iNaT ],dtype='M8[ns]') + self.assert_(isnull(s[1]) == True) + self.assert_(s.dtype == 'M8[ns]') + + s = Series([ datetime(2001,1,2,0,0), nan ],dtype='M8[ns]') + self.assert_(isnull(s[1]) == True) + self.assert_(s.dtype == 'M8[ns]') + def test_constructor_dict(self): d = {'a' : 0., 'b' : 1., 'c' : 2.} result = Series(d, index=['b', 'c', 'd', 'a']) diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py index 855bbd02489bb..60b2e989ea683 100644 --- a/pandas/tseries/tests/test_timeseries.py +++ b/pandas/tseries/tests/test_timeseries.py @@ -1235,9 +1235,9 @@ def test_append_concat(self): def test_set_dataframe_column_ns_dtype(self): x = DataFrame([datetime.now(), datetime.now()]) - self.assert_(x[0].dtype == object) + #self.assert_(x[0].dtype == object) - x[0] = to_datetime(x[0]) + #x[0] = to_datetime(x[0]) self.assert_(x[0].dtype == np.dtype('M8[ns]')) def test_groupby_count_dateparseerror(self): @@ -2066,10 +2066,11 @@ def test_get_level_values_box(self): def test_frame_apply_dont_convert_datetime64(self): from pandas.tseries.offsets import BDay df = DataFrame({'x1': [datetime(1996,1,1)]}) + df = df.applymap(lambda x: x+BDay()) df = df.applymap(lambda x: x+BDay()) - self.assertTrue(df.x1.dtype == object) + self.assertTrue(df.x1.dtype == 'M8[ns]') class TestLegacyCompat(unittest.TestCase):