Skip to content

Commit 33f515a

Browse files
committed
ENH: to_datetime will convert array of strings and NAs to datetime64 with NaT, close #999
1 parent 270a10b commit 33f515a

File tree

5 files changed

+103
-50
lines changed

5 files changed

+103
-50
lines changed

pandas/core/api.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
import numpy as np
44

5-
from pandas.core.datetools import DateOffset
5+
from pandas.core.datetools import DateOffset, to_datetime
66
import pandas.core.datetools as datetools
77

88
from pandas.core.common import isnull, notnull, save, load

pandas/core/datetools.py

+5
Original file line numberDiff line numberDiff line change
@@ -80,10 +80,15 @@ def to_datetime(arg, errors='ignore'):
8080
-------
8181
ret : datetime if parsing succeeded
8282
"""
83+
from pandas.core.series import Series
8384
if arg is None:
8485
return arg
8586
elif isinstance(arg, datetime):
8687
return arg
88+
elif isinstance(arg, Series):
89+
values = lib.string_to_datetime(com._ensure_object(arg.values),
90+
raise_=errors == 'raise')
91+
return Series(values, index=arg.index, name=arg.name)
8792
elif isinstance(arg, np.ndarray):
8893
return lib.string_to_datetime(com._ensure_object(arg),
8994
raise_=errors == 'raise')

pandas/src/datetime.pyx

+43
Original file line numberDiff line numberDiff line change
@@ -613,6 +613,49 @@ cdef class DayOffset(_Offset):
613613
# offset.next()
614614
# return i
615615

616+
def string_to_datetime(ndarray[object] strings, raise_=False):
617+
cdef:
618+
Py_ssize_t i, n = len(strings)
619+
object val
620+
ndarray[int64_t] iresult
621+
ndarray[object] oresult
622+
623+
from dateutil.parser import parse
624+
625+
626+
try:
627+
result = np.empty(n, dtype='M8[us]')
628+
iresult = result.view('i8')
629+
for i in range(n):
630+
val = strings[i]
631+
if util._checknull(val):
632+
result[i] = NaT
633+
elif PyDateTime_Check(val):
634+
result[i] = val
635+
else:
636+
try:
637+
result[i] = parse(val)
638+
except Exception:
639+
raise TypeError
640+
return result
641+
except TypeError:
642+
oresult = np.empty(n, dtype=object)
643+
644+
for i in range(n):
645+
val = strings[i]
646+
if util._checknull(val):
647+
oresult[i] = val
648+
else:
649+
try:
650+
oresult[i] = parse(val)
651+
except Exception:
652+
if raise_:
653+
raise
654+
oresult[i] = val
655+
656+
return oresult
657+
658+
616659
# Conversion routines
617660
# ------------------------------------------------------------------------------
618661

pandas/src/tseries.pyx

-21
Original file line numberDiff line numberDiff line change
@@ -571,27 +571,6 @@ def vec_binop(ndarray[object] left, ndarray[object] right, object op):
571571

572572
return maybe_convert_bool(result)
573573

574-
def string_to_datetime(ndarray[object] strings, raise_=False):
575-
cdef:
576-
Py_ssize_t i, n = len(strings)
577-
object val
578-
from dateutil.parser import parse
579-
580-
result = np.empty(n, dtype=object)
581-
582-
for i in range(n):
583-
val = strings[i]
584-
if util._checknull(val):
585-
result[i] = val
586-
else:
587-
try:
588-
result[i] = parse(val)
589-
except Exception:
590-
if raise_:
591-
raise
592-
result[i] = val
593-
594-
return result
595574

596575
def value_count_int64(ndarray[int64_t] values):
597576
cdef:

pandas/tests/test_timeseries.py

+54-28
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from pandas import (Index, Series, TimeSeries, DataFrame, isnull,
1212
date_range, Timestamp)
1313

14-
from pandas import DatetimeIndex
14+
from pandas import DatetimeIndex, to_datetime
1515

1616
from pandas.core.daterange import DateRange
1717

@@ -111,33 +111,6 @@ def assert_range_equal(left, right):
111111

112112
class TestTimeSeries(unittest.TestCase):
113113

114-
def test_string_na_conversion(self):
115-
from dateutil.parser import parse
116-
from pandas.core.datetools import to_datetime
117-
118-
strings = np.array(['1/1/2000', '1/2/2000', np.nan,
119-
'1/4/2000, 12:34:56'], dtype=object)
120-
121-
expected = []
122-
for val in strings:
123-
if com.isnull(val):
124-
expected.append(val)
125-
else:
126-
expected.append(parse(val))
127-
128-
result = lib.string_to_datetime(strings)
129-
assert_almost_equal(result, expected)
130-
131-
result2 = to_datetime(strings)
132-
assert_almost_equal(result, result2)
133-
134-
malformed = np.array(['1/100/2000', np.nan], dtype=object)
135-
result = to_datetime(malformed)
136-
assert_almost_equal(result, malformed)
137-
138-
self.assertRaises(ValueError, to_datetime, malformed,
139-
errors='raise')
140-
141114
def test_dti_slicing(self):
142115
dti = DatetimeIndex(start='1/1/2005', end='12/1/2005', freq='M')
143116
dti2 = dti[[1,3,5]]
@@ -349,6 +322,59 @@ def test_fillna_nat(self):
349322
assert_frame_equal(filled, expected)
350323
assert_frame_equal(filled2, expected)
351324

325+
def test_string_na_nat_conversion(self):
326+
# GH #999, #858
327+
328+
from dateutil.parser import parse
329+
from pandas.core.datetools import to_datetime
330+
331+
strings = np.array(['1/1/2000', '1/2/2000', np.nan,
332+
'1/4/2000, 12:34:56'], dtype=object)
333+
334+
expected = np.empty(4, dtype='M8')
335+
for i, val in enumerate(strings):
336+
if com.isnull(val):
337+
expected[i] = NaT
338+
else:
339+
expected[i] = parse(val)
340+
341+
result = lib.string_to_datetime(strings)
342+
assert_almost_equal(result, expected)
343+
344+
result2 = to_datetime(strings)
345+
assert_almost_equal(result, result2)
346+
347+
malformed = np.array(['1/100/2000', np.nan], dtype=object)
348+
result = to_datetime(malformed)
349+
assert_almost_equal(result, malformed)
350+
351+
self.assertRaises(ValueError, to_datetime, malformed,
352+
errors='raise')
353+
354+
idx = ['a', 'b', 'c', 'd', 'e']
355+
series = Series(['1/1/2000', np.nan, '1/3/2000', np.nan,
356+
'1/5/2000'], index=idx, name='foo')
357+
dseries = Series([to_datetime('1/1/2000'), np.nan,
358+
to_datetime('1/3/2000'), np.nan,
359+
to_datetime('1/5/2000')], index=idx, name='foo')
360+
361+
result = to_datetime(series)
362+
dresult = to_datetime(dseries)
363+
364+
expected = Series(np.empty(5, dtype='M8[us]'), index=idx)
365+
for i in range(5):
366+
x = series[i]
367+
if isnull(x):
368+
expected[i] = NaT
369+
else:
370+
expected[i] = to_datetime(x)
371+
372+
assert_series_equal(result, expected)
373+
self.assertEquals(result.name, 'foo')
374+
375+
assert_series_equal(dresult, expected)
376+
self.assertEquals(dresult.name, 'foo')
377+
352378
def _skip_if_no_pytz():
353379
try:
354380
import pytz

0 commit comments

Comments
 (0)