Skip to content

Commit dad39d5

Browse files
mroeschkejorisvandenbossche
authored andcommitted
API: Localize Series when calling to_datetime with utc=True (#6415) (#17109)
1 parent 062f6f1 commit dad39d5

File tree

6 files changed

+101
-25
lines changed

6 files changed

+101
-25
lines changed

doc/source/whatsnew/v0.21.0.txt

+30
Original file line numberDiff line numberDiff line change
@@ -298,6 +298,36 @@ length 2+ levels, so a :class:`MultiIndex` is always returned from all of the
298298

299299
pd.MultiIndex.from_tuples([('a',), ('b',)])
300300

301+
.. _whatsnew_0210.api.utc_localization_with_series:
302+
303+
UTC Localization with Series
304+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
305+
306+
Previously, :func:`to_datetime` did not localize datetime ``Series`` data when ``utc=True`` was passed. Now, :func:`to_datetime` will correctly localize ``Series`` with a ``datetime64[ns, UTC]`` dtype to be consistent with how list-like and ``Index`` data are handled. (:issue:`6415`).
307+
308+
Previous Behavior
309+
310+
.. ipython:: python
311+
312+
s = Series(['20130101 00:00:00'] * 3)
313+
314+
.. code-block:: ipython
315+
316+
In [12]: pd.to_datetime(s, utc=True)
317+
Out[12]:
318+
0 2013-01-01
319+
1 2013-01-01
320+
2 2013-01-01
321+
dtype: datetime64[ns]
322+
323+
New Behavior
324+
325+
.. ipython:: python
326+
327+
pd.to_datetime(s, utc=True)
328+
329+
Additionally, DataFrames with datetime columns that were parsed by :func:`read_sql_table` and :func:`read_sql_query` will also be localized to UTC only if the original SQL columns were timezone aware datetime columns.
330+
301331
.. _whatsnew_0210.api:
302332

303333
Other API Changes

pandas/core/tools/datetimes.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -516,7 +516,7 @@ def _convert_listlike(arg, box, format, name=None, tz=tz):
516516
result = arg
517517
elif isinstance(arg, ABCSeries):
518518
from pandas import Series
519-
values = _convert_listlike(arg._values, False, format)
519+
values = _convert_listlike(arg._values, True, format)
520520
result = Series(values, index=arg.index, name=arg.name)
521521
elif isinstance(arg, (ABCDataFrame, MutableMapping)):
522522
result = _assemble_from_unit_mappings(arg, errors=errors)

pandas/io/sql.py

+7-6
Original file line numberDiff line numberDiff line change
@@ -99,24 +99,24 @@ def _convert_params(sql, params):
9999
return args
100100

101101

102-
def _handle_date_column(col, format=None):
102+
def _handle_date_column(col, utc=None, format=None):
103103
if isinstance(format, dict):
104104
return to_datetime(col, errors='ignore', **format)
105105
else:
106106
if format in ['D', 's', 'ms', 'us', 'ns']:
107-
return to_datetime(col, errors='coerce', unit=format, utc=True)
107+
return to_datetime(col, errors='coerce', unit=format, utc=utc)
108108
elif (issubclass(col.dtype.type, np.floating) or
109109
issubclass(col.dtype.type, np.integer)):
110110
# parse dates as timestamp
111111
format = 's' if format is None else format
112-
return to_datetime(col, errors='coerce', unit=format, utc=True)
112+
return to_datetime(col, errors='coerce', unit=format, utc=utc)
113113
elif is_datetime64tz_dtype(col):
114114
# coerce to UTC timezone
115115
# GH11216
116116
return (to_datetime(col, errors='coerce')
117117
.astype('datetime64[ns, UTC]'))
118118
else:
119-
return to_datetime(col, errors='coerce', format=format, utc=True)
119+
return to_datetime(col, errors='coerce', format=format, utc=utc)
120120

121121

122122
def _parse_date_columns(data_frame, parse_dates):
@@ -821,8 +821,9 @@ def _harmonize_columns(self, parse_dates=None):
821821

822822
if (col_type is datetime or col_type is date or
823823
col_type is DatetimeTZDtype):
824-
self.frame[col_name] = _handle_date_column(df_col)
825-
824+
# Convert tz-aware Datetime SQL columns to UTC
825+
utc = col_type is DatetimeTZDtype
826+
self.frame[col_name] = _handle_date_column(df_col, utc=utc)
826827
elif col_type is float:
827828
# floats support NA, can always convert!
828829
self.frame[col_name] = df_col.astype(col_type, copy=False)

pandas/tests/indexes/datetimes/test_tools.py

+47-9
Original file line numberDiff line numberDiff line change
@@ -260,15 +260,53 @@ def test_to_datetime_tz_pytz(self):
260260
dtype='datetime64[ns, UTC]', freq=None)
261261
tm.assert_index_equal(result, expected)
262262

263-
def test_to_datetime_utc_is_true(self):
264-
# See gh-11934
265-
start = pd.Timestamp('2014-01-01', tz='utc')
266-
end = pd.Timestamp('2014-01-03', tz='utc')
267-
date_range = pd.bdate_range(start, end)
268-
269-
result = pd.to_datetime(date_range, utc=True)
270-
expected = pd.DatetimeIndex(data=date_range)
271-
tm.assert_index_equal(result, expected)
263+
@pytest.mark.parametrize("init_constructor, end_constructor, test_method",
264+
[(Index, DatetimeIndex, tm.assert_index_equal),
265+
(list, DatetimeIndex, tm.assert_index_equal),
266+
(np.array, DatetimeIndex, tm.assert_index_equal),
267+
(Series, Series, tm.assert_series_equal)])
268+
def test_to_datetime_utc_true(self,
269+
init_constructor,
270+
end_constructor,
271+
test_method):
272+
# See gh-11934 & gh-6415
273+
data = ['20100102 121314', '20100102 121315']
274+
expected_data = [pd.Timestamp('2010-01-02 12:13:14', tz='utc'),
275+
pd.Timestamp('2010-01-02 12:13:15', tz='utc')]
276+
277+
result = pd.to_datetime(init_constructor(data),
278+
format='%Y%m%d %H%M%S',
279+
utc=True)
280+
expected = end_constructor(expected_data)
281+
test_method(result, expected)
282+
283+
# Test scalar case as well
284+
for scalar, expected in zip(data, expected_data):
285+
result = pd.to_datetime(scalar, format='%Y%m%d %H%M%S', utc=True)
286+
assert result == expected
287+
288+
def test_to_datetime_utc_true_with_series_single_value(self):
289+
# GH 15760 UTC=True with Series
290+
ts = 1.5e18
291+
result = pd.to_datetime(pd.Series([ts]), utc=True)
292+
expected = pd.Series([pd.Timestamp(ts, tz='utc')])
293+
tm.assert_series_equal(result, expected)
294+
295+
def test_to_datetime_utc_true_with_series_tzaware_string(self):
296+
ts = '2013-01-01 00:00:00-01:00'
297+
expected_ts = '2013-01-01 01:00:00'
298+
data = pd.Series([ts] * 3)
299+
result = pd.to_datetime(data, utc=True)
300+
expected = pd.Series([pd.Timestamp(expected_ts, tz='utc')] * 3)
301+
tm.assert_series_equal(result, expected)
302+
303+
@pytest.mark.parametrize('date, dtype',
304+
[('2013-01-01 01:00:00', 'datetime64[ns]'),
305+
('2013-01-01 01:00:00', 'datetime64[ns, UTC]')])
306+
def test_to_datetime_utc_true_with_series_datetime_ns(self, date, dtype):
307+
expected = pd.Series([pd.Timestamp('2013-01-01 01:00:00', tz='UTC')])
308+
result = pd.to_datetime(pd.Series([date], dtype=dtype), utc=True)
309+
tm.assert_series_equal(result, expected)
272310

273311
def test_to_datetime_tz_psycopg2(self):
274312

pandas/tests/io/test_sql.py

+15-8
Original file line numberDiff line numberDiff line change
@@ -602,7 +602,7 @@ def test_execute_sql(self):
602602
tm.equalContents(row, [5.1, 3.5, 1.4, 0.2, 'Iris-setosa'])
603603

604604
def test_date_parsing(self):
605-
# Test date parsing in read_sq
605+
# Test date parsing in read_sql
606606
# No Parsing
607607
df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn)
608608
assert not issubclass(df.DateCol.dtype.type, np.datetime64)
@@ -1271,11 +1271,13 @@ def check(col):
12711271

12721272
# "2000-01-01 00:00:00-08:00" should convert to
12731273
# "2000-01-01 08:00:00"
1274-
assert col[0] == Timestamp('2000-01-01 08:00:00', tz='UTC')
1275-
12761274
# "2000-06-01 00:00:00-07:00" should convert to
12771275
# "2000-06-01 07:00:00"
1278-
assert col[1] == Timestamp('2000-06-01 07:00:00', tz='UTC')
1276+
# GH 6415
1277+
expected_data = [Timestamp('2000-01-01 08:00:00', tz='UTC'),
1278+
Timestamp('2000-06-01 07:00:00', tz='UTC')]
1279+
expected = Series(expected_data, name=col.name)
1280+
tm.assert_series_equal(col, expected)
12791281

12801282
else:
12811283
raise AssertionError("DateCol loaded with incorrect type "
@@ -1298,6 +1300,9 @@ def check(col):
12981300
self.conn, parse_dates=['DateColWithTz'])
12991301
if not hasattr(df, 'DateColWithTz'):
13001302
pytest.skip("no column with datetime with time zone")
1303+
col = df.DateColWithTz
1304+
assert is_datetime64tz_dtype(col.dtype)
1305+
assert str(col.dt.tz) == 'UTC'
13011306
check(df.DateColWithTz)
13021307

13031308
df = pd.concat(list(pd.read_sql_query("select * from types_test_data",
@@ -1307,9 +1312,9 @@ def check(col):
13071312
assert is_datetime64tz_dtype(col.dtype)
13081313
assert str(col.dt.tz) == 'UTC'
13091314
expected = sql.read_sql_table("types_test_data", self.conn)
1310-
tm.assert_series_equal(df.DateColWithTz,
1311-
expected.DateColWithTz
1312-
.astype('datetime64[ns, UTC]'))
1315+
col = expected.DateColWithTz
1316+
assert is_datetime64tz_dtype(col.dtype)
1317+
tm.assert_series_equal(df.DateColWithTz, expected.DateColWithTz)
13131318

13141319
# xref #7139
13151320
# this might or might not be converted depending on the postgres driver
@@ -1388,8 +1393,10 @@ def test_datetime_date(self):
13881393
df = DataFrame([date(2014, 1, 1), date(2014, 1, 2)], columns=["a"])
13891394
df.to_sql('test_date', self.conn, index=False)
13901395
res = read_sql_table('test_date', self.conn)
1396+
result = res['a']
1397+
expected = to_datetime(df['a'])
13911398
# comes back as datetime64
1392-
tm.assert_series_equal(res['a'], to_datetime(df['a']))
1399+
tm.assert_series_equal(result, expected)
13931400

13941401
def test_datetime_time(self):
13951402
# test support for datetime.time

pandas/tests/test_multilevel.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2137,7 +2137,7 @@ def test_set_index_datetime(self):
21372137
'2011-07-19 08:00:00', '2011-07-19 09:00:00'],
21382138
'value': range(6)})
21392139
df.index = pd.to_datetime(df.pop('datetime'), utc=True)
2140-
df.index = df.index.tz_localize('UTC').tz_convert('US/Pacific')
2140+
df.index = df.index.tz_convert('US/Pacific')
21412141

21422142
expected = pd.DatetimeIndex(['2011-07-19 07:00:00',
21432143
'2011-07-19 08:00:00',

0 commit comments

Comments
 (0)