Skip to content

Commit 9f38bda

Browse files
committed
Address comments
1 parent 0f1b1ab commit 9f38bda

File tree

3 files changed

+121
-110
lines changed

3 files changed

+121
-110
lines changed

pandas/_libs/tslibs/strptime.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -682,7 +682,7 @@ cdef _parse_timezone_directive(object z):
682682
if len(z) > 5:
683683
if z[5] != ':':
684684
msg = "Inconsistent use of : in {0}"
685-
raise ValueError(msg.format(found_dict['z']))
685+
raise ValueError(msg.format(z))
686686
z = z[:5] + z[6:]
687687
hours = int(z[1:3])
688688
minutes = int(z[3:5])

pandas/core/tools/datetimes.py

+68-59
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,70 @@ def _convert_and_box_cache(arg, cache_array, box, errors, name=None):
106106
return result.values
107107

108108

109+
def _return_parsed_timezone_results(result, tznames, tzoffsets, parsing_tzname,
110+
parsing_tzoffset, box):
111+
"""
112+
Return results from array_strptime if a %z or %Z directive was passed.
113+
114+
If %Z is only parsed, timezones will be a pytz.timezone object.
115+
If %z is only parsed, timezones will be a pytz.FixedOffset object.
116+
If both %Z and %z are parsed, timezones will be a datetime.timezone object.
117+
118+
Parameters
119+
----------
120+
result : ndarray
121+
int64 date representations of the dates
122+
tznames : ndarray
123+
strings of timezone names if %Z is parsed
124+
tzoffsets : ndarray
125+
timedelta objects of the timezone offset if %z is parsed
126+
parsing_tzname : boolean
127+
True if %Z is parsed
128+
parsing_tzoffset : boolean
129+
True if %z is parsed
130+
box : boolean
131+
True boxes result as an Index-like, False returns an ndarray
132+
133+
Returns
134+
-------
135+
tz_result : ndarray of parsed dates with timezone
136+
Returns:
137+
138+
- Index-like if box=True
139+
- ndarray of Timestamps if box=False
140+
141+
"""
142+
if parsing_tzname and not parsing_tzoffset:
143+
tz_results = np.array([tslib.Timestamp(res, tz=tz)
144+
for res, tz in zip(result, tznames)])
145+
elif parsing_tzoffset and not parsing_tzname:
146+
tz_results = []
147+
for res, offset in zip(result, tzoffsets):
148+
offset_mins = offset.total_seconds() / 60
149+
tzoffset = pytz.FixedOffset(offset_mins)
150+
ts = tslib.Timestamp(res)
151+
ts = ts.tz_localize(tzoffset)
152+
tz_results.append(ts)
153+
tz_results = np.array(tz_results)
154+
elif parsing_tzoffset and parsing_tzname:
155+
if not PY3:
156+
raise ValueError("Parsing tzoffsets are not "
157+
"not supported in Python 3")
158+
from datetime import timezone
159+
tz_results = []
160+
for res, offset, tzname in zip(result, tzoffsets, tznames):
161+
# Do we need to validate these timezones?
162+
# e.g. UTC / +0100
163+
tzinfo = timezone(offset, tzname)
164+
ts = tslib.Timestamp(res, tzinfo=tzinfo)
165+
tz_results.append(ts)
166+
tz_results = np.array(tz_results)
167+
if box:
168+
from pandas import Index
169+
return Index(tz_results)
170+
return tz_results
171+
172+
109173
def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
110174
utc=None, box=True, format=None, exact=True,
111175
unit=None, infer_datetime_format=False, origin='unix',
@@ -355,65 +419,10 @@ def _convert_listlike(arg, box, format, name=None, tz=tz):
355419
"information.")
356420
result, tznames, tzoffsets = array_strptime(
357421
arg, format, exact=exact, errors=errors)
358-
if parsing_tzname and not parsing_tzoffset:
359-
if len(set(tznames)) == 1:
360-
tz = tznames[0]
361-
if box:
362-
result = DatetimeIndex(result,
363-
tz=tz,
364-
name=name)
365-
else:
366-
stamps = [tslib.Timestamp(res, tz=tz)
367-
for res in result]
368-
result = np.array(stamps, dtype=object)
369-
else:
370-
stamps = [tslib.Timestamp(res, tz=tz)
371-
for res, tz in zip(result, tznames)]
372-
result = np.array(stamps, dtype=object)
373-
return result
374-
elif parsing_tzoffset and not parsing_tzname:
375-
# Should we convert these to pytz.FixedOffsets
376-
# or datetime.timezones?
377-
if len(set(tzoffsets)) == 1:
378-
offset_mins = tzoffsets[0].total_seconds() / 60
379-
tzoffset = pytz.FixedOffset(offset_mins)
380-
if box:
381-
result = DatetimeIndex(result,
382-
tz=tzoffset,
383-
name=name)
384-
else:
385-
stamps = []
386-
for res, offset in zip(result, tzoffsets):
387-
ts = tslib.Timestamp(res)
388-
ts = ts.tz_localize(tzoffset)
389-
stamps.append(ts)
390-
result = np.array(stamps, dtype=object)
391-
else:
392-
stamps = []
393-
for res, offset in zip(result, tzoffsets):
394-
offset_mins = offset.total_seconds() / 60
395-
tzoffset = pytz.FixedOffset(offset_mins)
396-
ts = tslib.Timestamp(res)
397-
ts = ts.tz_localize(tzoffset)
398-
stamps.append(ts)
399-
result = np.array(stamps, dtype=object)
400-
return result
401-
elif parsing_tzoffset and parsing_tzname:
402-
if not PY3:
403-
raise ValueError("Parsing tzoffsets are not "
404-
"not supported in Python 3")
405-
from datetime import timezone
406-
stamps = []
407-
for res, offset, tzname in zip(result, tzoffsets,
408-
tznames):
409-
# Do we need to validate these timezones?
410-
# e.g. UTC / +0100
411-
tzinfo = timezone(offset, tzname)
412-
ts = tslib.Timestamp(res, tzinfo=tzinfo)
413-
stamps.append(ts)
414-
result = np.array(stamps, dtype=object)
415-
return result
416-
422+
if parsing_tzname or parsing_tzoffset:
423+
return _return_parsed_timezone_results(
424+
result, tznames, tzoffsets, parsing_tzname,
425+
parsing_tzoffset, box)
417426
except tslib.OutOfBoundsDatetime:
418427
if errors == 'raise':
419428
raise

pandas/tests/indexes/datetimes/test_tools.py

+52-50
Original file line numberDiff line numberDiff line change
@@ -186,63 +186,65 @@ def test_to_datetime_format_weeks(self, cache):
186186
for s, format, dt in data:
187187
assert to_datetime(s, format=format, cache=cache) == dt
188188

189-
@pytest.mark.skipif(not PY3,
190-
reason="datetime.timezone not supported in PY2")
191-
def test_to_datetime_parse_timezone(self):
192-
# %Z parsing only
193-
fmt = '%Y-%m-%d %H:%M:%S %Z'
194-
dates = ['2010-01-01 12:00:00 UTC'] * 2
195-
result = pd.to_datetime(dates, format=fmt)
196-
expected_dates = [pd.Timestamp('2010-01-01 12:00:00', tz='UTC')] * 2
197-
expected = pd.DatetimeIndex(expected_dates)
198-
tm.assert_index_equal(result, expected)
199-
200-
result = pd.to_datetime(dates, format=fmt, box=False)
201-
expected = np.array(expected_dates, dtype=object)
202-
tm.assert_numpy_array_equal(result, expected)
203-
204-
dates = ['2010-01-01 12:00:00 UTC', '2010-01-01 12:00:00 GMT']
205-
result = pd.to_datetime(dates, format=fmt)
206-
expected_dates = [pd.Timestamp('2010-01-01 12:00:00', tz='UTC'),
207-
pd.Timestamp('2010-01-01 12:00:00', tz='GMT')]
208-
expected = np.array(expected_dates, dtype=object)
209-
tm.assert_numpy_array_equal(result, expected)
210-
211-
# %z parsing only
212-
dates = ['2010-01-01 12:00:00 +0100'] * 2
213-
fmt = '%Y-%m-%d %H:%M:%S %z'
214-
result = pd.to_datetime(dates, format=fmt)
215-
expected_dates = [pd.Timestamp('2010-01-01 12:00:00',
216-
tzinfo=pytz.FixedOffset(60))] * 2
217-
expected = pd.DatetimeIndex(expected_dates)
218-
tm.assert_index_equal(result, expected)
219-
220-
result = pd.to_datetime(dates, format=fmt, box=False)
221-
expected = np.array(expected_dates, dtype=object)
222-
tm.assert_numpy_array_equal(result, expected)
189+
@pytest.mark.parametrize("box,const,assert_equal", [
190+
[True, pd.Index, 'assert_index_equal'],
191+
[False, np.array, 'assert_numpy_array_equal']])
192+
@pytest.mark.parametrize("fmt,dates,expected_dates", [
193+
['%Y-%m-%d %H:%M:%S %Z',
194+
['2010-01-01 12:00:00 UTC'] * 2,
195+
[pd.Timestamp('2010-01-01 12:00:00', tz='UTC')] * 2],
196+
['%Y-%m-%d %H:%M:%S %Z',
197+
['2010-01-01 12:00:00 UTC', '2010-01-01 12:00:00 GMT'],
198+
[pd.Timestamp('2010-01-01 12:00:00', tz='UTC'),
199+
pd.Timestamp('2010-01-01 12:00:00', tz='GMT')]],
200+
['%Y-%m-%d %H:%M:%S %z',
201+
['2010-01-01 12:00:00 +0100'] * 2,
202+
[pd.Timestamp('2010-01-01 12:00:00',
203+
tzinfo=pytz.FixedOffset(60))] * 2],
204+
['%Y-%m-%d %H:%M:%S %z',
205+
['2010-01-01 12:00:00 +0100', '2010-01-01 12:00:00 -0100'],
206+
[pd.Timestamp('2010-01-01 12:00:00',
207+
tzinfo=pytz.FixedOffset(60)),
208+
pd.Timestamp('2010-01-01 12:00:00',
209+
tzinfo=pytz.FixedOffset(-60))]]])
210+
def test_to_datetime_parse_tzname_or_tzoffset(self, box, const,
211+
assert_equal, fmt,
212+
dates, expected_dates):
213+
# %z or %Z parsing
214+
result = pd.to_datetime(dates, format=fmt, box=box)
215+
expected = const(expected_dates)
216+
getattr(tm, assert_equal)(result, expected)
223217

224-
dates = ['2010-01-01 12:00:00 +0100', '2010-01-01 12:00:00 -0100']
225-
result = pd.to_datetime(dates, format=fmt)
226-
expected_dates = [pd.Timestamp('2010-01-01 12:00:00',
227-
tzinfo=pytz.FixedOffset(60)),
228-
pd.Timestamp('2010-01-01 12:00:00',
229-
tzinfo=pytz.FixedOffset(-60))]
230-
expected = np.array(expected_dates, dtype=object)
231-
tm.assert_numpy_array_equal(result, expected)
218+
with pytest.raises(ValueError):
219+
pd.to_datetime(dates, format=fmt, box=box, utc=True)
232220

221+
@pytest.mark.skipif(not PY3,
222+
reason="datetime.timezone not supported in PY2")
223+
@pytest.mark.parametrize("box,const,assert_equal", [
224+
[True, pd.Index, 'assert_index_equal'],
225+
[False, np.array, 'assert_numpy_array_equal']])
226+
@pytest.mark.parametrize("dates,expected_dates", [
227+
[['2010-01-01 12:00:00 UTC +0100'] * 2,
228+
[pd.Timestamp('2010-01-01 13:00:00',
229+
tzinfo=timezone(timedelta(minutes=60), 'UTC'))] * 2],
230+
[['2010-01-01 12:00:00 UTC +0100', '2010-01-01 12:00:00 GMT -0200'],
231+
[pd.Timestamp('2010-01-01 13:00:00',
232+
tzinfo=timezone(timedelta(minutes=60), 'UTC')),
233+
pd.Timestamp('2010-01-01 10:00:00',
234+
tzinfo=timezone(timedelta(minutes=-120), 'GMT'))]]])
235+
def test_to_datetime_parse_tzname_and_tzoffset(self, box, const,
236+
assert_equal, dates,
237+
expected_dates):
233238
# %z and %Z parsing
234-
dates = ['2010-01-01 12:00:00 UTC +0100'] * 2
235239
fmt = '%Y-%m-%d %H:%M:%S %Z %z'
236-
result = pd.to_datetime(dates, format=fmt)
237-
tzinfo = timezone(timedelta(minutes=60), 'UTC')
238-
expected_dates = [pd.Timestamp('2010-01-01 13:00:00', tzinfo=tzinfo)]
239-
expected = np.array(expected_dates * 2, dtype=object)
240-
tm.assert_numpy_array_equal(result, expected)
240+
result = pd.to_datetime(dates, format=fmt, box=box)
241+
expected = const(expected_dates)
242+
getattr(tm, assert_equal)(result, expected)
241243

242244
with pytest.raises(ValueError):
243-
pd.to_datetime(dates, format=fmt, utc=True)
245+
pd.to_datetime(dates, format=fmt, box=box, utc=True)
244246

245-
@pytest.mark.parametrize('cache', ['+0', '-1foo', 'UTCbar', ':10'])
247+
@pytest.mark.parametrize('offset', ['+0', '-1foo', 'UTCbar', ':10'])
246248
def test_to_datetime_parse_timezone_malformed(self, offset):
247249
fmt = '%Y-%m-%d %H:%M:%S %z'
248250
date = '2010-01-01 12:00:00 ' + offset

0 commit comments

Comments
 (0)