Skip to content

Commit e85263d

Browse files
committed
BUG: to_datetime not localizing Series when utc=True (#6415)
Modify test case Comment about test edit, move conversion logic to convert_listlike Add new section in whatsnew and update test Alter SQL tests
1 parent 929c66f commit e85263d

File tree

5 files changed

+95
-18
lines changed

5 files changed

+95
-18
lines changed

doc/source/whatsnew/v0.21.0.txt

+36-1
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,42 @@ named ``.isna()`` and ``.notna()``, these are included for classes ``Categorical
203203

204204
The configuration option ``pd.options.mode.use_inf_as_null`` is deprecated, and ``pd.options.mode.use_inf_as_na`` is added as a replacement.
205205

206+
.. _whatsnew_0210.api.utc_localization_with_series:
207+
208+
UTC Localization with Series
209+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
210+
211+
Previously, :func:`to_datetime` did not localize datetime ``Series`` data as when ``utc=True`` was passed. Now, :func:`to_datetime`
212+
will correctly localize `Series` with a `datetime64[ns, UTC]` data type. (:issue:`6415`)
213+
214+
Old Behavior
215+
216+
.. ipython:: python
217+
218+
s = Series(['20130101 00:00:00'] * 10)
219+
220+
.. code-block:: python
221+
222+
In [12]: pd.to_datetime(s, utc=True)
223+
Out[12]:
224+
0 2013-01-01
225+
1 2013-01-01
226+
2 2013-01-01
227+
3 2013-01-01
228+
4 2013-01-01
229+
5 2013-01-01
230+
6 2013-01-01
231+
7 2013-01-01
232+
8 2013-01-01
233+
9 2013-01-01
234+
dtype: datetime64[ns]
235+
236+
New Behavior
237+
238+
.. ipython:: python
239+
240+
pd.to_datetime(s, utc=True)
241+
206242
.. _whatsnew_0210.api:
207243

208244
Other API Changes
@@ -266,7 +302,6 @@ Conversion
266302
- Bug in assignment against datetime-like data with ``int`` may incorrectly converte to datetime-like (:issue:`14145`)
267303
- Bug in assignment against ``int64`` data with ``np.ndarray`` with ``float64`` dtype may keep ``int64`` dtype (:issue:`14001`)
268304

269-
270305
Indexing
271306
^^^^^^^^
272307

pandas/core/tools/datetimes.py

+18-11
Original file line numberDiff line numberDiff line change
@@ -359,7 +359,9 @@ def _convert_listlike(arg, box, format, name=None, tz=tz):
359359
return DatetimeIndex(arg, tz=tz, name=name)
360360
except ValueError:
361361
pass
362-
362+
from pandas import Series
363+
if isinstance(arg, Series) and utc:
364+
arg = arg.dt.tz_localize('utc')
363365
return arg
364366

365367
elif unit is not None:
@@ -379,11 +381,12 @@ def _convert_listlike(arg, box, format, name=None, tz=tz):
379381
raise TypeError('arg must be a string, datetime, list, tuple, '
380382
'1-d array, or Series')
381383

382-
arg = _ensure_object(arg)
384+
obj_arg = _ensure_object(arg)
383385
require_iso8601 = False
384386

385387
if infer_datetime_format and format is None:
386-
format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst)
388+
format = _guess_datetime_format_for_array(obj_arg,
389+
dayfirst=dayfirst)
387390

388391
if format is not None:
389392
# There is a special fast-path for iso8601 formatted
@@ -402,46 +405,50 @@ def _convert_listlike(arg, box, format, name=None, tz=tz):
402405
# shortcut formatting here
403406
if format == '%Y%m%d':
404407
try:
405-
result = _attempt_YYYYMMDD(arg, errors=errors)
408+
result = _attempt_YYYYMMDD(obj_arg, errors=errors)
406409
except:
407410
raise ValueError("cannot convert the input to "
408411
"'%Y%m%d' date format")
409412

410413
# fallback
411414
if result is None:
412415
try:
413-
result = tslib.array_strptime(arg, format, exact=exact,
416+
result = tslib.array_strptime(obj_arg, format,
417+
exact=exact,
414418
errors=errors)
415419
except tslib.OutOfBoundsDatetime:
416420
if errors == 'raise':
417421
raise
418-
result = arg
422+
result = obj_arg
419423
except ValueError:
420424
# if format was inferred, try falling back
421425
# to array_to_datetime - terminate here
422426
# for specified formats
423427
if not infer_datetime_format:
424428
if errors == 'raise':
425429
raise
426-
result = arg
430+
result = obj_arg
427431

428432
if result is None and (format is None or infer_datetime_format):
429433
result = tslib.array_to_datetime(
430-
arg,
434+
obj_arg,
431435
errors=errors,
432436
utc=utc,
433437
dayfirst=dayfirst,
434438
yearfirst=yearfirst,
435439
require_iso8601=require_iso8601
436440
)
437-
441+
from pandas import Series
438442
if is_datetime64_dtype(result) and box:
439443
result = DatetimeIndex(result, tz=tz, name=name)
444+
# GH 6415
445+
elif isinstance(arg, Series) and utc:
446+
result = Series(result, name=name).dt.tz_localize('utc')
440447
return result
441448

442449
except ValueError as e:
443450
try:
444-
values, tz = tslib.datetime_to_datetime64(arg)
451+
values, tz = tslib.datetime_to_datetime64(obj_arg)
445452
return DatetimeIndex._simple_new(values, name=name, tz=tz)
446453
except (ValueError, TypeError):
447454
raise e
@@ -506,7 +513,7 @@ def _convert_listlike(arg, box, format, name=None, tz=tz):
506513
result = arg
507514
elif isinstance(arg, ABCSeries):
508515
from pandas import Series
509-
values = _convert_listlike(arg._values, False, format)
516+
values = _convert_listlike(arg, False, format, name=arg.name)
510517
result = Series(values, index=arg.index, name=arg.name)
511518
elif isinstance(arg, (ABCDataFrame, MutableMapping)):
512519
result = _assemble_from_unit_mappings(arg, errors=errors)

pandas/tests/indexes/datetimes/test_tools.py

+32
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,38 @@ def test_to_datetime_utc_is_true(self):
270270
expected = pd.DatetimeIndex(data=date_range)
271271
tm.assert_index_equal(result, expected)
272272

273+
def test_to_datetime_utc_true_with_series(self):
274+
# GH 6415: UTC=True with Series
275+
data = ['20100102 121314', '20100102 121315']
276+
expected_data = [pd.Timestamp('2010-01-02 12:13:14', tz='utc'),
277+
pd.Timestamp('2010-01-02 12:13:15', tz='utc')]
278+
result = pd.to_datetime(pd.Series(data),
279+
format='%Y%m%d %H%M%S',
280+
utc=True)
281+
expected = pd.Series(expected_data)
282+
tm.assert_series_equal(result, expected)
283+
result = pd.to_datetime(pd.Index(data),
284+
format='%Y%m%d %H%M%S',
285+
utc=True)
286+
expected = pd.DatetimeIndex(expected_data)
287+
tm.assert_index_equal(result, expected)
288+
289+
# GH 15760 UTC=True with Series
290+
ts = 1.5e18
291+
result = pd.to_datetime(pd.Series([ts]), utc=True)
292+
expected = pd.Series([pd.Timestamp(ts, tz='utc')])
293+
tm.assert_series_equal(result, expected)
294+
295+
test_dates = ['2013-01-01 00:00:00-01:00'] * 10
296+
expected_data = [pd.Timestamp('20130101 01:00:00', tz='utc')] * 10
297+
expected = pd.Series(expected_data)
298+
ser = Series(test_dates)
299+
result = pd.to_datetime(ser, utc=True)
300+
tm.assert_series_equal(result, expected)
301+
ser_naive = Series(test_dates, dtype='datetime64[ns]')
302+
result = pd.to_datetime(ser_naive, utc=True)
303+
tm.assert_series_equal(result, expected)
304+
273305
def test_to_datetime_tz_psycopg2(self):
274306

275307
# xref 8260

pandas/tests/io/test_sql.py

+7-5
Original file line numberDiff line numberDiff line change
@@ -606,14 +606,15 @@ def test_date_parsing(self):
606606
# No Parsing
607607
df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn)
608608
assert not issubclass(df.DateCol.dtype.type, np.datetime64)
609-
609+
# Now that GH 6415 is fixed, dates are automatically parsed to UTC
610+
utc_dtype = pd.core.dtypes.dtypes.DatetimeTZDtypeType
610611
df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn,
611612
parse_dates=['DateCol'])
612-
assert issubclass(df.DateCol.dtype.type, np.datetime64)
613+
assert issubclass(df.DateCol.dtype.type, utc_dtype)
613614

614615
df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn,
615616
parse_dates={'DateCol': '%Y-%m-%d %H:%M:%S'})
616-
assert issubclass(df.DateCol.dtype.type, np.datetime64)
617+
assert issubclass(df.DateCol.dtype.type, utc_dtype)
617618

618619
df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn,
619620
parse_dates=['IntDateCol'])
@@ -631,8 +632,9 @@ def test_date_and_index(self):
631632
df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn,
632633
index_col='DateCol',
633634
parse_dates=['DateCol', 'IntDateCol'])
634-
635-
assert issubclass(df.index.dtype.type, np.datetime64)
635+
# Now that GH 6415 is fixed, dates are automatically parsed to UTC
636+
utc_dtype = pd.core.dtypes.dtypes.DatetimeTZDtypeType
637+
assert issubclass(df.index.dtype.type, utc_dtype)
636638
assert issubclass(df.IntDateCol.dtype.type, np.datetime64)
637639

638640
def test_timedelta(self):

pandas/tests/test_multilevel.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -2130,7 +2130,8 @@ def test_set_index_datetime(self):
21302130
'2011-07-19 08:00:00', '2011-07-19 09:00:00'],
21312131
'value': range(6)})
21322132
df.index = pd.to_datetime(df.pop('datetime'), utc=True)
2133-
df.index = df.index.tz_localize('UTC').tz_convert('US/Pacific')
2133+
# Removed 'tz_localize('utc') below after GH 6415 was fixed
2134+
df.index = df.index.tz_convert('US/Pacific')
21342135

21352136
expected = pd.DatetimeIndex(['2011-07-19 07:00:00',
21362137
'2011-07-19 08:00:00',

0 commit comments

Comments
 (0)