Skip to content

Commit b846c57

Browse files
committed
Fxi merge
1 parent 062f6f1 commit b846c57

File tree

5 files changed

+105
-16
lines changed

5 files changed

+105
-16
lines changed

doc/source/whatsnew/v0.21.0.txt

+36
Original file line numberDiff line numberDiff line change
@@ -298,6 +298,39 @@ length 2+ levels, so a :class:`MultiIndex` is always returned from all of the
298298

299299
pd.MultiIndex.from_tuples([('a',), ('b',)])
300300

301+
.. _whatsnew_0210.api.utc_localization_with_series:
302+
303+
UTC Localization with Series
304+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
305+
306+
Previous Behavior
307+
308+
.. ipython:: python
309+
310+
s = Series(['20130101 00:00:00'] * 10)
311+
312+
.. code-block:: python
313+
314+
In [12]: pd.to_datetime(s, utc=True)
315+
Out[12]:
316+
0 2013-01-01
317+
1 2013-01-01
318+
2 2013-01-01
319+
3 2013-01-01
320+
4 2013-01-01
321+
5 2013-01-01
322+
6 2013-01-01
323+
7 2013-01-01
324+
8 2013-01-01
325+
9 2013-01-01
326+
dtype: datetime64[ns]
327+
328+
New Behavior
329+
330+
.. ipython:: python
331+
332+
pd.to_datetime(s, utc=True)
333+
301334
.. _whatsnew_0210.api:
302335

303336
Other API Changes
@@ -363,10 +396,13 @@ Conversion
363396

364397
- Bug in assignment against datetime-like data with ``int`` may incorrectly convert to datetime-like (:issue:`14145`)
365398
- Bug in assignment against ``int64`` data with ``np.ndarray`` with ``float64`` dtype may keep ``int64`` dtype (:issue:`14001`)
399+
<<<<<<< 062f6f118fe4ea439ae255a8ff886a532e20ecdb
366400
- Fix :func:`DataFrame.memory_usage` to support PyPy. Objects on PyPy do not have a fixed size, so an approximation is used instead (:issue:`17228`)
367401
- Fixed the return type of ``IntervalIndex.is_non_overlapping_monotonic`` to be a Python ``bool`` for consistency with similar attributes/methods. Previously returned a ``numpy.bool_``. (:issue:`17237`)
368402
- Bug in ``IntervalIndex.is_non_overlapping_monotonic`` when intervals are closed on both sides and overlap at a point (:issue:`16560`)
369403
- Bug in :func:`Series.fillna` returns frame when ``inplace=True`` and ``value`` is dict (:issue:`16156`)
404+
=======
405+
>>>>>>> BUG: to_datetime not localizing Series when utc=True (#6415)
370406

371407
Indexing
372408
^^^^^^^^

pandas/core/tools/datetimes.py

+23-6
Original file line numberDiff line numberDiff line change
@@ -345,6 +345,17 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
345345

346346
tz = 'utc' if utc else None
347347

348+
def _maybe_convert_to_utc(arg, utc):
349+
if utc:
350+
if isinstance(arg, ABCSeries):
351+
arg = arg.dt.tz_localize('UTC')
352+
elif isinstance(arg, DatetimeIndex):
353+
if arg.tz is None:
354+
arg = arg.tz_localize('UTC')
355+
else:
356+
arg = arg.tz_convert('UTC')
357+
return arg
358+
348359
def _convert_listlike(arg, box, format, name=None, tz=tz):
349360

350361
if isinstance(arg, (list, tuple)):
@@ -364,7 +375,7 @@ def _convert_listlike(arg, box, format, name=None, tz=tz):
364375
return DatetimeIndex(arg, tz=tz, name=name)
365376
except ValueError:
366377
pass
367-
378+
arg = _maybe_convert_to_utc(arg, utc)
368379
return arg
369380

370381
elif unit is not None:
@@ -383,12 +394,15 @@ def _convert_listlike(arg, box, format, name=None, tz=tz):
383394
elif getattr(arg, 'ndim', 1) > 1:
384395
raise TypeError('arg must be a string, datetime, list, tuple, '
385396
'1-d array, or Series')
386-
397+
# _ensure_object converts Series to numpy array, need to reconvert
398+
# upon return
399+
arg_is_series = isinstance(arg, ABCSeries)
387400
arg = _ensure_object(arg)
388401
require_iso8601 = False
389402

390403
if infer_datetime_format and format is None:
391-
format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst)
404+
format = _guess_datetime_format_for_array(arg,
405+
dayfirst=dayfirst)
392406

393407
if format is not None:
394408
# There is a special fast-path for iso8601 formatted
@@ -415,7 +429,8 @@ def _convert_listlike(arg, box, format, name=None, tz=tz):
415429
# fallback
416430
if result is None:
417431
try:
418-
result = tslib.array_strptime(arg, format, exact=exact,
432+
result = tslib.array_strptime(arg, format,
433+
exact=exact,
419434
errors=errors)
420435
except tslib.OutOfBoundsDatetime:
421436
if errors == 'raise':
@@ -439,9 +454,11 @@ def _convert_listlike(arg, box, format, name=None, tz=tz):
439454
yearfirst=yearfirst,
440455
require_iso8601=require_iso8601
441456
)
442-
443457
if is_datetime64_dtype(result) and box:
444458
result = DatetimeIndex(result, tz=tz, name=name)
459+
# GH 6415
460+
elif arg_is_series:
461+
result = _maybe_convert_to_utc(Series(result, name=name), utc)
445462
return result
446463

447464
except ValueError as e:
@@ -516,7 +533,7 @@ def _convert_listlike(arg, box, format, name=None, tz=tz):
516533
result = arg
517534
elif isinstance(arg, ABCSeries):
518535
from pandas import Series
519-
values = _convert_listlike(arg._values, False, format)
536+
values = _convert_listlike(arg, False, format, name=arg.name)
520537
result = Series(values, index=arg.index, name=arg.name)
521538
elif isinstance(arg, (ABCDataFrame, MutableMapping)):
522539
result = _assemble_from_unit_mappings(arg, errors=errors)

pandas/tests/indexes/datetimes/test_tools.py

+32
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,38 @@ def test_to_datetime_utc_is_true(self):
270270
expected = pd.DatetimeIndex(data=date_range)
271271
tm.assert_index_equal(result, expected)
272272

273+
def test_to_datetime_utc_true_with_series(self):
274+
# GH 6415: UTC=True with Series
275+
data = ['20100102 121314', '20100102 121315']
276+
expected_data = [pd.Timestamp('2010-01-02 12:13:14', tz='utc'),
277+
pd.Timestamp('2010-01-02 12:13:15', tz='utc')]
278+
result = pd.to_datetime(pd.Series(data),
279+
format='%Y%m%d %H%M%S',
280+
utc=True)
281+
expected = pd.Series(expected_data)
282+
tm.assert_series_equal(result, expected)
283+
result = pd.to_datetime(pd.Index(data),
284+
format='%Y%m%d %H%M%S',
285+
utc=True)
286+
expected = pd.DatetimeIndex(expected_data)
287+
tm.assert_index_equal(result, expected)
288+
289+
# GH 15760 UTC=True with Series
290+
ts = 1.5e18
291+
result = pd.to_datetime(pd.Series([ts]), utc=True)
292+
expected = pd.Series([pd.Timestamp(ts, tz='utc')])
293+
tm.assert_series_equal(result, expected)
294+
295+
test_dates = ['2013-01-01 00:00:00-01:00'] * 10
296+
expected_data = [pd.Timestamp('20130101 01:00:00', tz='utc')] * 10
297+
expected = pd.Series(expected_data)
298+
ser = Series(test_dates)
299+
result = pd.to_datetime(ser, utc=True)
300+
tm.assert_series_equal(result, expected)
301+
ser_naive = Series(test_dates, dtype='datetime64[ns]')
302+
result = pd.to_datetime(ser_naive, utc=True)
303+
tm.assert_series_equal(result, expected)
304+
273305
def test_to_datetime_tz_psycopg2(self):
274306

275307
# xref 8260

pandas/tests/io/test_sql.py

+12-9
Original file line numberDiff line numberDiff line change
@@ -606,14 +606,15 @@ def test_date_parsing(self):
606606
# No Parsing
607607
df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn)
608608
assert not issubclass(df.DateCol.dtype.type, np.datetime64)
609-
609+
# Now that GH 6415 is fixed, dates are automatically parsed to UTC
610+
utc_dtype = pd.core.dtypes.dtypes.DatetimeTZDtypeType
610611
df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn,
611612
parse_dates=['DateCol'])
612-
assert issubclass(df.DateCol.dtype.type, np.datetime64)
613+
assert issubclass(df.DateCol.dtype.type, utc_dtype)
613614

614615
df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn,
615616
parse_dates={'DateCol': '%Y-%m-%d %H:%M:%S'})
616-
assert issubclass(df.DateCol.dtype.type, np.datetime64)
617+
assert issubclass(df.DateCol.dtype.type, utc_dtype)
617618

618619
df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn,
619620
parse_dates=['IntDateCol'])
@@ -631,8 +632,9 @@ def test_date_and_index(self):
631632
df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn,
632633
index_col='DateCol',
633634
parse_dates=['DateCol', 'IntDateCol'])
634-
635-
assert issubclass(df.index.dtype.type, np.datetime64)
635+
# Now that GH 6415 is fixed, dates are automatically parsed to UTC
636+
utc_dtype = pd.core.dtypes.dtypes.DatetimeTZDtypeType
637+
assert issubclass(df.index.dtype.type, utc_dtype)
636638
assert issubclass(df.IntDateCol.dtype.type, np.datetime64)
637639

638640
def test_timedelta(self):
@@ -1319,18 +1321,19 @@ def check(col):
13191321
def test_date_parsing(self):
13201322
# No Parsing
13211323
df = sql.read_sql_table("types_test_data", self.conn)
1322-
1324+
# Now that GH 6415 is fixed, dates are automatically parsed to UTC
1325+
utc_dtype = pd.core.dtypes.dtypes.DatetimeTZDtypeType
13231326
df = sql.read_sql_table("types_test_data", self.conn,
13241327
parse_dates=['DateCol'])
1325-
assert issubclass(df.DateCol.dtype.type, np.datetime64)
1328+
assert issubclass(df.DateCol.dtype.type, utc_dtype)
13261329

13271330
df = sql.read_sql_table("types_test_data", self.conn,
13281331
parse_dates={'DateCol': '%Y-%m-%d %H:%M:%S'})
1329-
assert issubclass(df.DateCol.dtype.type, np.datetime64)
1332+
assert issubclass(df.DateCol.dtype.type, utc_dtype)
13301333

13311334
df = sql.read_sql_table("types_test_data", self.conn, parse_dates={
13321335
'DateCol': {'format': '%Y-%m-%d %H:%M:%S'}})
1333-
assert issubclass(df.DateCol.dtype.type, np.datetime64)
1336+
assert issubclass(df.DateCol.dtype.type, utc_dtype)
13341337

13351338
df = sql.read_sql_table(
13361339
"types_test_data", self.conn, parse_dates=['IntDateCol'])

pandas/tests/test_multilevel.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -2137,7 +2137,8 @@ def test_set_index_datetime(self):
21372137
'2011-07-19 08:00:00', '2011-07-19 09:00:00'],
21382138
'value': range(6)})
21392139
df.index = pd.to_datetime(df.pop('datetime'), utc=True)
2140-
df.index = df.index.tz_localize('UTC').tz_convert('US/Pacific')
2140+
# Removed 'tz_localize('utc') below after GH 6415 was fixed
2141+
df.index = df.index.tz_convert('US/Pacific')
21412142

21422143
expected = pd.DatetimeIndex(['2011-07-19 07:00:00',
21432144
'2011-07-19 08:00:00',

0 commit comments

Comments
 (0)