@@ -20,6 +20,7 @@ except:
20
20
except :
21
21
from _dummy_thread import allocate_lock as _thread_allocate_lock
22
22
23
+ import pytz
23
24
24
25
from cython cimport Py_ssize_t
25
26
from cpython cimport PyFloat_Check
@@ -40,6 +41,27 @@ from util cimport is_string_object
40
41
from nattype cimport checknull_with_nat, NPY_NAT
41
42
from nattype import nat_strings
42
43
44
+ cdef dict _parse_code_table = {' y' : 0 ,
45
+ ' Y' : 1 ,
46
+ ' m' : 2 ,
47
+ ' B' : 3 ,
48
+ ' b' : 4 ,
49
+ ' d' : 5 ,
50
+ ' H' : 6 ,
51
+ ' I' : 7 ,
52
+ ' M' : 8 ,
53
+ ' S' : 9 ,
54
+ ' f' : 10 ,
55
+ ' A' : 11 ,
56
+ ' a' : 12 ,
57
+ ' w' : 13 ,
58
+ ' j' : 14 ,
59
+ ' U' : 15 ,
60
+ ' W' : 16 ,
61
+ ' Z' : 17 ,
62
+ ' p' : 18 , # an additional key, only with I
63
+ ' z' : 19 }
64
+
43
65
44
66
def array_strptime (ndarray[object] values , object fmt ,
45
67
bint exact = True , errors = ' raise' ):
@@ -58,15 +80,15 @@ def array_strptime(ndarray[object] values, object fmt,
58
80
Py_ssize_t i, n = len (values)
59
81
pandas_datetimestruct dts
60
82
ndarray[int64_t] iresult
61
- int year, month, day, minute, hour, second, weekday, julian, tz
62
- int week_of_year, week_of_year_start
83
+ ndarray[object ] result_timezone
84
+ int year, month, day, minute, hour, second, weekday, julian
85
+ int week_of_year, week_of_year_start, parse_code, ordinal
63
86
int64_t us, ns
64
- object val, group_key, ampm, found
87
+ object val, group_key, ampm, found, timezone
65
88
dict found_key
66
89
bint is_raise = errors== ' raise'
67
90
bint is_ignore = errors== ' ignore'
68
91
bint is_coerce = errors== ' coerce'
69
- int ordinal
70
92
71
93
assert is_raise or is_ignore or is_coerce
72
94
@@ -79,6 +101,8 @@ def array_strptime(ndarray[object] values, object fmt,
79
101
in fmt):
80
102
raise ValueError (" Cannot use '%W ' or '%U ' without "
81
103
" day and year" )
104
+ elif ' %Z ' in fmt and ' %z ' in fmt:
105
+ raise ValueError (" Cannot parse both %Z and %z " )
82
106
83
107
global _TimeRE_cache, _regex_cache
84
108
with _cache_lock:
@@ -108,32 +132,10 @@ def array_strptime(ndarray[object] values, object fmt,
108
132
109
133
result = np.empty(n, dtype = ' M8[ns]' )
110
134
iresult = result.view(' i8' )
135
+ result_timezone = np.empty(n, dtype = ' object' )
111
136
112
137
dts.us = dts.ps = dts.as = 0
113
138
114
- cdef dict _parse_code_table = {
115
- ' y' : 0 ,
116
- ' Y' : 1 ,
117
- ' m' : 2 ,
118
- ' B' : 3 ,
119
- ' b' : 4 ,
120
- ' d' : 5 ,
121
- ' H' : 6 ,
122
- ' I' : 7 ,
123
- ' M' : 8 ,
124
- ' S' : 9 ,
125
- ' f' : 10 ,
126
- ' A' : 11 ,
127
- ' a' : 12 ,
128
- ' w' : 13 ,
129
- ' j' : 14 ,
130
- ' U' : 15 ,
131
- ' W' : 16 ,
132
- ' Z' : 17 ,
133
- ' p' : 18 # just an additional key, works only with I
134
- }
135
- cdef int parse_code
136
-
137
139
for i in range (n):
138
140
val = values[i]
139
141
if is_string_object(val):
@@ -176,7 +178,7 @@ def array_strptime(ndarray[object] values, object fmt,
176
178
year = 1900
177
179
month = day = 1
178
180
hour = minute = second = ns = us = 0
179
- tz = - 1
181
+ timezone = None
180
182
# Default to -1 to signify that values not known; not critical to have,
181
183
# though
182
184
week_of_year = - 1
@@ -266,21 +268,10 @@ def array_strptime(ndarray[object] values, object fmt,
266
268
# W starts week on Monday.
267
269
week_of_year_start = 0
268
270
elif parse_code == 17 :
269
- # Since -1 is default value only need to worry about setting tz
270
- # if it can be something other than -1.
271
- found_zone = found_dict[' Z' ].lower()
272
- for value, tz_values in enumerate (locale_time.timezone):
273
- if found_zone in tz_values:
274
- # Deal w/ bad locale setup where timezone names are the
275
- # same and yet time.daylight is true; too ambiguous to
276
- # be able to tell what timezone has daylight savings
277
- if (time.tzname[0 ] == time.tzname[1 ] and
278
- time.daylight and found_zone not in (
279
- " utc" , " gmt" )):
280
- break
281
- else :
282
- tz = value
283
- break
271
+ timezone = pytz.timezone(found_dict[' Z' ])
272
+ elif parse_code == 19 :
273
+ timezone = parse_timezone_directive(found_dict[' z' ])
274
+
284
275
# If we know the wk of the year and what day of that wk, we can figure
285
276
# out the Julian day of the year.
286
277
if julian == - 1 and week_of_year != - 1 and weekday != - 1 :
@@ -330,7 +321,9 @@ def array_strptime(ndarray[object] values, object fmt,
330
321
continue
331
322
raise
332
323
333
- return result
324
+ result_timezone[i] = timezone
325
+
326
+ return result, result_timezone
334
327
335
328
336
329
""" _getlang, LocaleTime, TimeRE, _calc_julian_from_U_or_W are vendored
@@ -538,14 +531,13 @@ class TimeRE(dict):
538
531
# XXX: Does 'Y' need to worry about having less or more than
539
532
# 4 digits?
540
533
' Y' : r " ( ?P<Y> \d\d\d\d ) " ,
534
+ ' z' : r " ( ?P<z> [+- ]\d\d :? [0-5 ]\d ( :? [0-5 ]\d ( \. \d {1,6} ) ? ) ? | Z) " ,
541
535
' A' : self .__seqToRE(self .locale_time.f_weekday, ' A' ),
542
536
' a' : self .__seqToRE(self .locale_time.a_weekday, ' a' ),
543
537
' B' : self .__seqToRE(self .locale_time.f_month[1 :], ' B' ),
544
538
' b' : self .__seqToRE(self .locale_time.a_month[1 :], ' b' ),
545
539
' p' : self .__seqToRE(self .locale_time.am_pm, ' p' ),
546
- ' Z' : self .__seqToRE([tz for tz_names in self .locale_time.timezone
547
- for tz in tz_names],
548
- ' Z' ),
540
+ ' Z' : self .__seqToRE(pytz.all_timezones, ' Z' ),
549
541
' %' : ' %' })
550
542
base.__setitem__ (' W' , base.__getitem__ (' U' ).replace(' U' , ' W' ))
551
543
base.__setitem__ (' c' , self .pattern(self .locale_time.LC_date_time))
@@ -632,3 +624,50 @@ cdef _calc_julian_from_U_or_W(int year, int week_of_year,
632
624
else :
633
625
days_to_week = week_0_length + (7 * (week_of_year - 1 ))
634
626
return 1 + days_to_week + day_of_week
627
+
628
+ cdef parse_timezone_directive(object z):
629
+ """
630
+ Parse the '%z ' directive and return a pytz.FixedOffset
631
+
632
+ Parameters
633
+ ----------
634
+ z : string of the UTC offset
635
+
636
+ Returns
637
+ -------
638
+ pytz.FixedOffset
639
+
640
+ Notes
641
+ -----
642
+ This is essentially similar to the cpython implementation
643
+ https://github.com/python/cpython/blob/master/Lib/_strptime.py#L457-L479
644
+ """
645
+
646
+ cdef:
647
+ int gmtoff_fraction, hours, minutes, seconds, pad_number, microseconds
648
+ int total_minutes
649
+ object gmtoff_remainder, gmtoff_remainder_padding
650
+
651
+ if z == ' Z' :
652
+ return pytz.FixedOffset(0 )
653
+ if z[3 ] == ' :' :
654
+ z = z[:3 ] + z[4 :]
655
+ if len (z) > 5 :
656
+ if z[5 ] != ' :' :
657
+ msg = " Inconsistent use of : in {0}"
658
+ raise ValueError (msg.format(z))
659
+ z = z[:5 ] + z[6 :]
660
+ hours = int (z[1 :3 ])
661
+ minutes = int (z[3 :5 ])
662
+ seconds = int (z[5 :7 ] or 0 )
663
+
664
+ # Pad to always return microseconds.
665
+ gmtoff_remainder = z[8 :]
666
+ pad_number = 6 - len (gmtoff_remainder)
667
+ gmtoff_remainder_padding = " 0" * pad_number
668
+ microseconds = int (gmtoff_remainder + gmtoff_remainder_padding)
669
+
670
+ total_minutes = ((hours * 60 ) + minutes + (seconds / 60 ) +
671
+ (microseconds / 60000000 ))
672
+ total_minutes = - total_minutes if z.startswith(" -" ) else total_minutes
673
+ return pytz.FixedOffset(total_minutes)
0 commit comments