Skip to content

Commit 4cafeb0

Browse files
committed
ENH: handling of dateutil-generated UTC datetime objects in to_datetime close #1693
1 parent 0c72d8b commit 4cafeb0

File tree

5 files changed

+43
-12
lines changed

5 files changed

+43
-12
lines changed

RELEASE.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ pandas 0.8.2
3030
**Improvements to existing features**
3131

3232
- Add ``flags`` option for ``re.compile`` in some Series.str methods (#1659)
33+
- Parsing of UTC date strings in read_* functions (#1693)
3334

3435
**API Changes**
3536

pandas/io/tests/test_parsers.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1388,6 +1388,16 @@ def test_file(self):
13881388
url_table = read_table('file://localhost/'+localtable)
13891389
assert_frame_equal(url_table, local_table)
13901390

1391+
def test_parse_tz_aware(self):
1392+
import pytz
1393+
# #1693
1394+
data = StringIO("Date,x\n2012-06-13T01:39:00Z,0.5")
1395+
1396+
# it works
1397+
result = read_csv(data, index_col=0, parse_dates=True)
1398+
stamp = result.index[0]
1399+
self.assert_(stamp.minute == 39)
1400+
self.assert_(result.index.tz is pytz.utc)
13911401

13921402
class TestParseSQL(unittest.TestCase):
13931403

pandas/src/datetime.pyx

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ def ints_to_pydatetime(ndarray[int64_t] arr, tz=None):
3838
ndarray[object] result = np.empty(n, dtype=object)
3939

4040
if tz is not None:
41-
if tz is pytz.utc:
41+
if _is_utc(tz):
4242
for i in range(n):
4343
pandas_datetime_to_datetimestruct(arr[i], PANDAS_FR_ns, &dts)
4444
result[i] = datetime(dts.year, dts.month, dts.day, dts.hour,
@@ -529,7 +529,7 @@ cpdef convert_to_tsobject(object ts, object tz=None):
529529
else:
530530
obj.value = _pydatetime_to_dts(ts, &obj.dts)
531531
obj.tzinfo = ts.tzinfo
532-
if obj.tzinfo is not None:
532+
if obj.tzinfo is not None and not _is_utc(obj.tzinfo):
533533
obj.value -= _delta_to_nanoseconds(obj.tzinfo._utcoffset)
534534
_check_dts_bounds(obj.value, &obj.dts)
535535
return obj
@@ -543,7 +543,7 @@ cpdef convert_to_tsobject(object ts, object tz=None):
543543
_check_dts_bounds(obj.value, &obj.dts)
544544

545545
if tz is not None:
546-
if tz is pytz.utc:
546+
if _is_utc(tz):
547547
obj.tzinfo = tz
548548
else:
549549
# Adjust datetime64 timestamp, recompute datetimestruct
@@ -558,6 +558,9 @@ cpdef convert_to_tsobject(object ts, object tz=None):
558558

559559
return obj
560560

561+
cdef inline bint _is_utc(object tz):
562+
return tz is UTC or isinstance(tz, _du_utc)
563+
561564
cdef int64_t _NS_LOWER_BOUND = -9223285636854775809LL
562565
cdef int64_t _NS_UPPER_BOUND = -9223372036854775807LL
563566

@@ -788,6 +791,7 @@ def i8_to_pydt(int64_t i8, object tzinfo = None):
788791
# time zone conversion helpers
789792

790793
try:
794+
from dateutil.tz import tzutc as _du_utc
791795
import pytz
792796
UTC = pytz.utc
793797
have_pytz = True
@@ -937,7 +941,7 @@ def tz_localize_check(ndarray[int64_t] vals, object tz):
937941
if not have_pytz:
938942
raise Exception("Could not find pytz module")
939943

940-
if tz == pytz.utc or tz is None:
944+
if tz == UTC or tz is None:
941945
return
942946

943947
trans = _get_transitions(tz)
@@ -984,7 +988,7 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz):
984988
if not have_pytz:
985989
raise Exception("Could not find pytz module")
986990

987-
if tz == pytz.utc or tz is None:
991+
if tz == UTC or tz is None:
988992
return vals
989993

990994
trans = _get_transitions(tz) # transition dates

pandas/tseries/index.py

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -213,18 +213,27 @@ def __new__(cls, data=None,
213213
else:
214214
subarr = data.view(_NS_DTYPE)
215215
else:
216-
subarr = tools.to_datetime(data)
216+
try:
217+
subarr = tools.to_datetime(data)
218+
except ValueError:
219+
# tz aware
220+
subarr = tools.to_datetime(data, utc=True)
221+
217222
if not np.issubdtype(subarr.dtype, np.datetime64):
218223
raise TypeError('Unable to convert %s to datetime dtype'
219224
% str(data))
220225

221-
if tz is not None:
222-
tz = tools._maybe_get_tz(tz)
223-
# Convert local to UTC
224-
ints = subarr.view('i8')
226+
if isinstance(subarr, DatetimeIndex):
227+
if tz is None:
228+
tz = subarr.tz
229+
else:
230+
if tz is not None:
231+
tz = tools._maybe_get_tz(tz)
232+
# Convert local to UTC
233+
ints = subarr.view('i8')
225234

226-
subarr = lib.tz_localize_to_utc(ints, tz)
227-
subarr = subarr.view(_NS_DTYPE)
235+
subarr = lib.tz_localize_to_utc(ints, tz)
236+
subarr = subarr.view(_NS_DTYPE)
228237

229238
subarr = subarr.view(cls)
230239
subarr.name = name

pandas/tseries/tests/test_timezones.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -369,6 +369,13 @@ def test_convert_tz_aware_datetime_datetime(self):
369369
self.assert_(np.array_equal(converted.asi8, ex_vals))
370370
self.assert_(converted.tz is pytz.utc)
371371

372+
def test_to_datetime_utc(self):
373+
from dateutil.parser import parse
374+
arr = np.array([parse('2012-06-13T01:39:00Z')], dtype=object)
375+
376+
result = to_datetime(arr, utc=True)
377+
self.assert_(result.tz is pytz.utc)
378+
372379
class TestTimeZones(unittest.TestCase):
373380

374381
def setUp(self):

0 commit comments

Comments
 (0)