Skip to content

Commit 7990c5b

Browse files
committed
BUG: to_datetime not localizing Series when utc=True (pandas-dev#6415)
Modify test case Comment about test edit, move conversion logic to convert_listlike Add new section in whatsnew and update test Alter SQL tests Modify whatsnew and make new wrapper function to handle UTC conversion Simiplified whatsnew and reverted arg renaming
1 parent 929c66f commit 7990c5b

File tree

5 files changed

+102
-17
lines changed

5 files changed

+102
-17
lines changed

doc/source/whatsnew/v0.21.0.txt

+33-1
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,39 @@ named ``.isna()`` and ``.notna()``, these are included for classes ``Categorical
203203

204204
The configuration option ``pd.options.mode.use_inf_as_null`` is deprecated, and ``pd.options.mode.use_inf_as_na`` is added as a replacement.
205205

206+
.. _whatsnew_0210.api.utc_localization_with_series:
207+
208+
UTC Localization with Series
209+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
210+
211+
Previous Behavior
212+
213+
.. ipython:: python
214+
215+
s = Series(['20130101 00:00:00'] * 10)
216+
217+
.. code-block:: python
218+
219+
In [12]: pd.to_datetime(s, utc=True)
220+
Out[12]:
221+
0 2013-01-01
222+
1 2013-01-01
223+
2 2013-01-01
224+
3 2013-01-01
225+
4 2013-01-01
226+
5 2013-01-01
227+
6 2013-01-01
228+
7 2013-01-01
229+
8 2013-01-01
230+
9 2013-01-01
231+
dtype: datetime64[ns]
232+
233+
New Behavior
234+
235+
.. ipython:: python
236+
237+
pd.to_datetime(s, utc=True)
238+
206239
.. _whatsnew_0210.api:
207240

208241
Other API Changes
@@ -266,7 +299,6 @@ Conversion
266299
- Bug in assignment against datetime-like data with ``int`` may incorrectly converte to datetime-like (:issue:`14145`)
267300
- Bug in assignment against ``int64`` data with ``np.ndarray`` with ``float64`` dtype may keep ``int64`` dtype (:issue:`14001`)
268301

269-
270302
Indexing
271303
^^^^^^^^
272304

pandas/core/tools/datetimes.py

+23-6
Original file line numberDiff line numberDiff line change
@@ -340,6 +340,17 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
340340

341341
tz = 'utc' if utc else None
342342

343+
def _maybe_convert_to_utc(arg, utc):
344+
if utc:
345+
if isinstance(arg, ABCSeries):
346+
arg = arg.dt.tz_localize('UTC')
347+
elif isinstance(arg, DatetimeIndex):
348+
if arg.tz is None:
349+
arg = arg.tz_localize('UTC')
350+
else:
351+
arg = arg.tz_convert('UTC')
352+
return arg
353+
343354
def _convert_listlike(arg, box, format, name=None, tz=tz):
344355

345356
if isinstance(arg, (list, tuple)):
@@ -359,7 +370,7 @@ def _convert_listlike(arg, box, format, name=None, tz=tz):
359370
return DatetimeIndex(arg, tz=tz, name=name)
360371
except ValueError:
361372
pass
362-
373+
arg = _maybe_convert_to_utc(arg, utc)
363374
return arg
364375

365376
elif unit is not None:
@@ -378,12 +389,15 @@ def _convert_listlike(arg, box, format, name=None, tz=tz):
378389
elif getattr(arg, 'ndim', 1) > 1:
379390
raise TypeError('arg must be a string, datetime, list, tuple, '
380391
'1-d array, or Series')
381-
392+
# _ensure_object converts Series to numpy array, need to reconvert
393+
# upon return
394+
arg_is_series = isinstance(arg, ABCSeries)
382395
arg = _ensure_object(arg)
383396
require_iso8601 = False
384397

385398
if infer_datetime_format and format is None:
386-
format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst)
399+
format = _guess_datetime_format_for_array(arg,
400+
dayfirst=dayfirst)
387401

388402
if format is not None:
389403
# There is a special fast-path for iso8601 formatted
@@ -410,7 +424,8 @@ def _convert_listlike(arg, box, format, name=None, tz=tz):
410424
# fallback
411425
if result is None:
412426
try:
413-
result = tslib.array_strptime(arg, format, exact=exact,
427+
result = tslib.array_strptime(arg, format,
428+
exact=exact,
414429
errors=errors)
415430
except tslib.OutOfBoundsDatetime:
416431
if errors == 'raise':
@@ -434,9 +449,11 @@ def _convert_listlike(arg, box, format, name=None, tz=tz):
434449
yearfirst=yearfirst,
435450
require_iso8601=require_iso8601
436451
)
437-
438452
if is_datetime64_dtype(result) and box:
439453
result = DatetimeIndex(result, tz=tz, name=name)
454+
# GH 6415
455+
elif arg_is_series:
456+
result = _maybe_convert_to_utc(Series(result, name=name), utc)
440457
return result
441458

442459
except ValueError as e:
@@ -506,7 +523,7 @@ def _convert_listlike(arg, box, format, name=None, tz=tz):
506523
result = arg
507524
elif isinstance(arg, ABCSeries):
508525
from pandas import Series
509-
values = _convert_listlike(arg._values, False, format)
526+
values = _convert_listlike(arg, False, format, name=arg.name)
510527
result = Series(values, index=arg.index, name=arg.name)
511528
elif isinstance(arg, (ABCDataFrame, MutableMapping)):
512529
result = _assemble_from_unit_mappings(arg, errors=errors)

pandas/tests/indexes/datetimes/test_tools.py

+32
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,38 @@ def test_to_datetime_utc_is_true(self):
270270
expected = pd.DatetimeIndex(data=date_range)
271271
tm.assert_index_equal(result, expected)
272272

273+
def test_to_datetime_utc_true_with_series(self):
274+
# GH 6415: UTC=True with Series
275+
data = ['20100102 121314', '20100102 121315']
276+
expected_data = [pd.Timestamp('2010-01-02 12:13:14', tz='utc'),
277+
pd.Timestamp('2010-01-02 12:13:15', tz='utc')]
278+
result = pd.to_datetime(pd.Series(data),
279+
format='%Y%m%d %H%M%S',
280+
utc=True)
281+
expected = pd.Series(expected_data)
282+
tm.assert_series_equal(result, expected)
283+
result = pd.to_datetime(pd.Index(data),
284+
format='%Y%m%d %H%M%S',
285+
utc=True)
286+
expected = pd.DatetimeIndex(expected_data)
287+
tm.assert_index_equal(result, expected)
288+
289+
# GH 15760 UTC=True with Series
290+
ts = 1.5e18
291+
result = pd.to_datetime(pd.Series([ts]), utc=True)
292+
expected = pd.Series([pd.Timestamp(ts, tz='utc')])
293+
tm.assert_series_equal(result, expected)
294+
295+
test_dates = ['2013-01-01 00:00:00-01:00'] * 10
296+
expected_data = [pd.Timestamp('20130101 01:00:00', tz='utc')] * 10
297+
expected = pd.Series(expected_data)
298+
ser = Series(test_dates)
299+
result = pd.to_datetime(ser, utc=True)
300+
tm.assert_series_equal(result, expected)
301+
ser_naive = Series(test_dates, dtype='datetime64[ns]')
302+
result = pd.to_datetime(ser_naive, utc=True)
303+
tm.assert_series_equal(result, expected)
304+
273305
def test_to_datetime_tz_psycopg2(self):
274306

275307
# xref 8260

pandas/tests/io/test_sql.py

+12-9
Original file line numberDiff line numberDiff line change
@@ -606,14 +606,15 @@ def test_date_parsing(self):
606606
# No Parsing
607607
df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn)
608608
assert not issubclass(df.DateCol.dtype.type, np.datetime64)
609-
609+
# Now that GH 6415 is fixed, dates are automatically parsed to UTC
610+
utc_dtype = pd.core.dtypes.dtypes.DatetimeTZDtypeType
610611
df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn,
611612
parse_dates=['DateCol'])
612-
assert issubclass(df.DateCol.dtype.type, np.datetime64)
613+
assert issubclass(df.DateCol.dtype.type, utc_dtype)
613614

614615
df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn,
615616
parse_dates={'DateCol': '%Y-%m-%d %H:%M:%S'})
616-
assert issubclass(df.DateCol.dtype.type, np.datetime64)
617+
assert issubclass(df.DateCol.dtype.type, utc_dtype)
617618

618619
df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn,
619620
parse_dates=['IntDateCol'])
@@ -631,8 +632,9 @@ def test_date_and_index(self):
631632
df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn,
632633
index_col='DateCol',
633634
parse_dates=['DateCol', 'IntDateCol'])
634-
635-
assert issubclass(df.index.dtype.type, np.datetime64)
635+
# Now that GH 6415 is fixed, dates are automatically parsed to UTC
636+
utc_dtype = pd.core.dtypes.dtypes.DatetimeTZDtypeType
637+
assert issubclass(df.index.dtype.type, utc_dtype)
636638
assert issubclass(df.IntDateCol.dtype.type, np.datetime64)
637639

638640
def test_timedelta(self):
@@ -1319,18 +1321,19 @@ def check(col):
13191321
def test_date_parsing(self):
13201322
# No Parsing
13211323
df = sql.read_sql_table("types_test_data", self.conn)
1322-
1324+
# Now that GH 6415 is fixed, dates are automatically parsed to UTC
1325+
utc_dtype = pd.core.dtypes.dtypes.DatetimeTZDtypeType
13231326
df = sql.read_sql_table("types_test_data", self.conn,
13241327
parse_dates=['DateCol'])
1325-
assert issubclass(df.DateCol.dtype.type, np.datetime64)
1328+
assert issubclass(df.DateCol.dtype.type, utc_dtype)
13261329

13271330
df = sql.read_sql_table("types_test_data", self.conn,
13281331
parse_dates={'DateCol': '%Y-%m-%d %H:%M:%S'})
1329-
assert issubclass(df.DateCol.dtype.type, np.datetime64)
1332+
assert issubclass(df.DateCol.dtype.type, utc_dtype)
13301333

13311334
df = sql.read_sql_table("types_test_data", self.conn, parse_dates={
13321335
'DateCol': {'format': '%Y-%m-%d %H:%M:%S'}})
1333-
assert issubclass(df.DateCol.dtype.type, np.datetime64)
1336+
assert issubclass(df.DateCol.dtype.type, utc_dtype)
13341337

13351338
df = sql.read_sql_table(
13361339
"types_test_data", self.conn, parse_dates=['IntDateCol'])

pandas/tests/test_multilevel.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -2130,7 +2130,8 @@ def test_set_index_datetime(self):
21302130
'2011-07-19 08:00:00', '2011-07-19 09:00:00'],
21312131
'value': range(6)})
21322132
df.index = pd.to_datetime(df.pop('datetime'), utc=True)
2133-
df.index = df.index.tz_localize('UTC').tz_convert('US/Pacific')
2133+
# Removed 'tz_localize('utc') below after GH 6415 was fixed
2134+
df.index = df.index.tz_convert('US/Pacific')
21342135

21352136
expected = pd.DatetimeIndex(['2011-07-19 07:00:00',
21362137
'2011-07-19 08:00:00',

0 commit comments

Comments
 (0)