Skip to content

BUG: provide for automatic conversion of object -> datetime64 #2595

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions pandas/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -628,6 +628,28 @@ def _consensus_name_attr(objs):
#----------------------------------------------------------------------
# Lots of little utilities

def _possibly_cast_to_datetime(value, dtype):
""" try to cast the array/value to a datetimelike dtype, converting float nan to iNaT """

if dtype == 'M8[ns]':
if np.isscalar(value):
if value == tslib.iNaT or isnull(value):
value = tslib.iNaT
else:
value = np.array(value)

# have a scalar array-like (e.g. NaT)
if value.ndim == 0:
value = tslib.iNaT

# we have an array of datetime & nulls
elif np.prod(value.shape):
try:
value = tslib.array_to_datetime(value)
except:
pass

return value

def _infer_dtype(value):
if isinstance(value, (float, np.floating)):
Expand Down
8 changes: 7 additions & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -4218,7 +4218,13 @@ def applymap(self, func):
-------
applied : DataFrame
"""
return self.apply(lambda x: lib.map_infer(x, func))

# if we have a dtype == 'M8[ns]', provide boxed values
def infer(x):
if x.dtype == 'M8[ns]':
x = lib.map_infer(x, lib.Timestamp)
return lib.map_infer(x, func)
return self.apply(infer)

#----------------------------------------------------------------------
# Merging / joining methods
Expand Down
2 changes: 2 additions & 0 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,13 +111,15 @@ def _setitem_with_indexer(self, indexer, value):
data = self.obj[item]
values = data.values
if np.prod(values.shape):
value = com._possibly_cast_to_datetime(value,getattr(data,'dtype',None))
values[plane_indexer] = value
except ValueError:
for item, v in zip(item_labels[het_idx], value):
data = self.obj[item]
values = data.values
if np.prod(values.shape):
values[plane_indexer] = v

else:
if isinstance(indexer, tuple):
indexer = _maybe_convert_ix(*indexer)
Expand Down
17 changes: 16 additions & 1 deletion pandas/core/internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -446,6 +446,7 @@ def get_values(self, dtype):
def make_block(values, items, ref_items):
dtype = values.dtype
vtype = dtype.type
klass = None

if issubclass(vtype, np.floating):
klass = FloatBlock
Expand All @@ -459,7 +460,21 @@ def make_block(values, items, ref_items):
klass = IntBlock
elif dtype == np.bool_:
klass = BoolBlock
else:

# try to infer a datetimeblock
if klass is None and np.prod(values.shape):
flat = values.flatten()
inferred_type = lib.infer_dtype(flat)
if inferred_type == 'datetime':

# we have an object array that has been inferred as datetime, so convert it
try:
values = tslib.array_to_datetime(flat).reshape(values.shape)
klass = DatetimeBlock
except: # it already object, so leave it
pass

if klass is None:
klass = ObjectBlock

return klass(values, items, ref_items, ndim=values.ndim)
Expand Down
7 changes: 5 additions & 2 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -2983,12 +2983,13 @@ def _sanitize_array(data, index, dtype=None, copy=False,

def _try_cast(arr):
try:
subarr = np.array(data, dtype=dtype, copy=copy)
arr = com._possibly_cast_to_datetime(arr, dtype)
subarr = np.array(arr, dtype=dtype, copy=copy)
except (ValueError, TypeError):
if dtype is not None and raise_cast_failure:
raise
else: # pragma: no cover
subarr = np.array(data, dtype=object, copy=copy)
subarr = np.array(arr, dtype=object, copy=copy)
return subarr

# GH #846
Expand Down Expand Up @@ -3047,6 +3048,8 @@ def _try_cast(arr):
value, dtype = _dtype_from_scalar(value)
subarr = np.empty(len(index), dtype=dtype)
else:
# need to possibly convert the value here
value = com._possibly_cast_to_datetime(value, dtype)
subarr = np.empty(len(index), dtype=dtype)
subarr.fill(value)
else:
Expand Down
32 changes: 31 additions & 1 deletion pandas/tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
import pandas.core.format as fmt
import pandas.core.datetools as datetools
from pandas.core.api import (DataFrame, Index, Series, notnull, isnull,
MultiIndex, DatetimeIndex)
MultiIndex, DatetimeIndex, Timestamp)
from pandas.io.parsers import read_csv

from pandas.util.testing import (assert_almost_equal,
Expand Down Expand Up @@ -1073,6 +1073,36 @@ def test_setitem_single_column_mixed(self):
expected = [nan, 'qux', nan, 'qux', nan]
assert_almost_equal(df['str'].values, expected)

def test_setitem_single_column_mixed_datetime(self):
df = DataFrame(randn(5, 3), index=['a', 'b', 'c', 'd', 'e'],
columns=['foo', 'bar', 'baz'])

df['timestamp'] = Timestamp('20010102')

# check our dtypes
result = df.get_dtype_counts()
expected = Series({ 'float64' : 3, 'datetime64[ns]' : 1})
assert_series_equal(result, expected)

# set an allowable datetime64 type
from pandas import tslib
df.ix['b','timestamp'] = tslib.iNaT
self.assert_(com.isnull(df.ix['b','timestamp']))

# allow this syntax
df.ix['c','timestamp'] = nan
self.assert_(com.isnull(df.ix['c','timestamp']))

# allow this syntax
df.ix['d',:] = nan
self.assert_(com.isnull(df.ix['c',:]).all() == False)

# try to set with a list like item
self.assertRaises(Exception, df.ix.__setitem__, ('d','timestamp'), [nan])

# prior to 0.10.1 this failed
#self.assertRaises(TypeError, df.ix.__setitem__, ('c','timestamp'), nan)

def test_setitem_frame(self):
piece = self.frame.ix[:2, ['A', 'B']]
self.frame.ix[-2:, ['A', 'B']] = piece.values
Expand Down
20 changes: 20 additions & 0 deletions pandas/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -351,6 +351,26 @@ def test_constructor_dtype_nocast(self):
s2[1] = 5
self.assertEquals(s[1], 5)

def test_constructor_dtype_datetime64(self):
import pandas.tslib as tslib

s = Series(tslib.iNaT,dtype='M8[ns]',index=range(5))
self.assert_(isnull(s).all() == True)

s = Series(tslib.NaT,dtype='M8[ns]',index=range(5))
self.assert_(isnull(s).all() == True)

s = Series(nan,dtype='M8[ns]',index=range(5))
self.assert_(isnull(s).all() == True)

s = Series([ datetime(2001,1,2,0,0), tslib.iNaT ],dtype='M8[ns]')
self.assert_(isnull(s[1]) == True)
self.assert_(s.dtype == 'M8[ns]')

s = Series([ datetime(2001,1,2,0,0), nan ],dtype='M8[ns]')
self.assert_(isnull(s[1]) == True)
self.assert_(s.dtype == 'M8[ns]')

def test_constructor_dict(self):
d = {'a' : 0., 'b' : 1., 'c' : 2.}
result = Series(d, index=['b', 'c', 'd', 'a'])
Expand Down
7 changes: 4 additions & 3 deletions pandas/tseries/tests/test_timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -1235,9 +1235,9 @@ def test_append_concat(self):

def test_set_dataframe_column_ns_dtype(self):
x = DataFrame([datetime.now(), datetime.now()])
self.assert_(x[0].dtype == object)
#self.assert_(x[0].dtype == object)

x[0] = to_datetime(x[0])
#x[0] = to_datetime(x[0])
self.assert_(x[0].dtype == np.dtype('M8[ns]'))

def test_groupby_count_dateparseerror(self):
Expand Down Expand Up @@ -2066,10 +2066,11 @@ def test_get_level_values_box(self):
def test_frame_apply_dont_convert_datetime64(self):
from pandas.tseries.offsets import BDay
df = DataFrame({'x1': [datetime(1996,1,1)]})

df = df.applymap(lambda x: x+BDay())
df = df.applymap(lambda x: x+BDay())

self.assertTrue(df.x1.dtype == object)
self.assertTrue(df.x1.dtype == 'M8[ns]')


class TestLegacyCompat(unittest.TestCase):
Expand Down