diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 273cbd8357f85..e0963a1908bbc 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -298,6 +298,36 @@ length 2+ levels, so a :class:`MultiIndex` is always returned from all of the pd.MultiIndex.from_tuples([('a',), ('b',)]) +.. _whatsnew_0210.api.utc_localization_with_series: + +UTC Localization with Series +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Previously, :func:`to_datetime` did not localize datetime ``Series`` data when ``utc=True`` was passed. Now, :func:`to_datetime` will correctly localize ``Series`` with a ``datetime64[ns, UTC]`` dtype to be consistent with how list-like and ``Index`` data are handled. (:issue:`6415`). + + Previous Behavior + + .. ipython:: python + + s = Series(['20130101 00:00:00'] * 3) + + .. code-block:: ipython + + In [12]: pd.to_datetime(s, utc=True) + Out[12]: + 0 2013-01-01 + 1 2013-01-01 + 2 2013-01-01 + dtype: datetime64[ns] + + New Behavior + + .. ipython:: python + + pd.to_datetime(s, utc=True) + +Additionally, DataFrames with datetime columns that were parsed by :func:`read_sql_table` and :func:`read_sql_query` will also be localized to UTC only if the original SQL columns were timezone aware datetime columns. + .. _whatsnew_0210.api: Other API Changes diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index c0f234a36803d..9ff0275a7c370 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -516,7 +516,7 @@ def _convert_listlike(arg, box, format, name=None, tz=tz): result = arg elif isinstance(arg, ABCSeries): from pandas import Series - values = _convert_listlike(arg._values, False, format) + values = _convert_listlike(arg._values, True, format) result = Series(values, index=arg.index, name=arg.name) elif isinstance(arg, (ABCDataFrame, MutableMapping)): result = _assemble_from_unit_mappings(arg, errors=errors) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 9aa47e5c69850..9c6d01d236c57 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -99,24 +99,24 @@ def _convert_params(sql, params): return args -def _handle_date_column(col, format=None): +def _handle_date_column(col, utc=None, format=None): if isinstance(format, dict): return to_datetime(col, errors='ignore', **format) else: if format in ['D', 's', 'ms', 'us', 'ns']: - return to_datetime(col, errors='coerce', unit=format, utc=True) + return to_datetime(col, errors='coerce', unit=format, utc=utc) elif (issubclass(col.dtype.type, np.floating) or issubclass(col.dtype.type, np.integer)): # parse dates as timestamp format = 's' if format is None else format - return to_datetime(col, errors='coerce', unit=format, utc=True) + return to_datetime(col, errors='coerce', unit=format, utc=utc) elif is_datetime64tz_dtype(col): # coerce to UTC timezone # GH11216 return (to_datetime(col, errors='coerce') .astype('datetime64[ns, UTC]')) else: - return to_datetime(col, errors='coerce', format=format, utc=True) + return to_datetime(col, errors='coerce', format=format, utc=utc) def _parse_date_columns(data_frame, parse_dates): @@ -821,8 +821,9 @@ def _harmonize_columns(self, parse_dates=None): if (col_type is datetime or col_type is date or col_type is DatetimeTZDtype): - self.frame[col_name] = _handle_date_column(df_col) - + # Convert tz-aware Datetime SQL columns to UTC + utc = col_type is DatetimeTZDtype + self.frame[col_name] = _handle_date_column(df_col, utc=utc) elif col_type is float: # floats support NA, can always convert! self.frame[col_name] = df_col.astype(col_type, copy=False) diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index 50669ee357bbd..089d74a1d69b8 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -260,15 +260,53 @@ def test_to_datetime_tz_pytz(self): dtype='datetime64[ns, UTC]', freq=None) tm.assert_index_equal(result, expected) - def test_to_datetime_utc_is_true(self): - # See gh-11934 - start = pd.Timestamp('2014-01-01', tz='utc') - end = pd.Timestamp('2014-01-03', tz='utc') - date_range = pd.bdate_range(start, end) - - result = pd.to_datetime(date_range, utc=True) - expected = pd.DatetimeIndex(data=date_range) - tm.assert_index_equal(result, expected) + @pytest.mark.parametrize("init_constructor, end_constructor, test_method", + [(Index, DatetimeIndex, tm.assert_index_equal), + (list, DatetimeIndex, tm.assert_index_equal), + (np.array, DatetimeIndex, tm.assert_index_equal), + (Series, Series, tm.assert_series_equal)]) + def test_to_datetime_utc_true(self, + init_constructor, + end_constructor, + test_method): + # See gh-11934 & gh-6415 + data = ['20100102 121314', '20100102 121315'] + expected_data = [pd.Timestamp('2010-01-02 12:13:14', tz='utc'), + pd.Timestamp('2010-01-02 12:13:15', tz='utc')] + + result = pd.to_datetime(init_constructor(data), + format='%Y%m%d %H%M%S', + utc=True) + expected = end_constructor(expected_data) + test_method(result, expected) + + # Test scalar case as well + for scalar, expected in zip(data, expected_data): + result = pd.to_datetime(scalar, format='%Y%m%d %H%M%S', utc=True) + assert result == expected + + def test_to_datetime_utc_true_with_series_single_value(self): + # GH 15760 UTC=True with Series + ts = 1.5e18 + result = pd.to_datetime(pd.Series([ts]), utc=True) + expected = pd.Series([pd.Timestamp(ts, tz='utc')]) + tm.assert_series_equal(result, expected) + + def test_to_datetime_utc_true_with_series_tzaware_string(self): + ts = '2013-01-01 00:00:00-01:00' + expected_ts = '2013-01-01 01:00:00' + data = pd.Series([ts] * 3) + result = pd.to_datetime(data, utc=True) + expected = pd.Series([pd.Timestamp(expected_ts, tz='utc')] * 3) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize('date, dtype', + [('2013-01-01 01:00:00', 'datetime64[ns]'), + ('2013-01-01 01:00:00', 'datetime64[ns, UTC]')]) + def test_to_datetime_utc_true_with_series_datetime_ns(self, date, dtype): + expected = pd.Series([pd.Timestamp('2013-01-01 01:00:00', tz='UTC')]) + result = pd.to_datetime(pd.Series([date], dtype=dtype), utc=True) + tm.assert_series_equal(result, expected) def test_to_datetime_tz_psycopg2(self): diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index a7c42391effe6..93eb0ff0ac1f2 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -602,7 +602,7 @@ def test_execute_sql(self): tm.equalContents(row, [5.1, 3.5, 1.4, 0.2, 'Iris-setosa']) def test_date_parsing(self): - # Test date parsing in read_sq + # Test date parsing in read_sql # No Parsing df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn) assert not issubclass(df.DateCol.dtype.type, np.datetime64) @@ -1271,11 +1271,13 @@ def check(col): # "2000-01-01 00:00:00-08:00" should convert to # "2000-01-01 08:00:00" - assert col[0] == Timestamp('2000-01-01 08:00:00', tz='UTC') - # "2000-06-01 00:00:00-07:00" should convert to # "2000-06-01 07:00:00" - assert col[1] == Timestamp('2000-06-01 07:00:00', tz='UTC') + # GH 6415 + expected_data = [Timestamp('2000-01-01 08:00:00', tz='UTC'), + Timestamp('2000-06-01 07:00:00', tz='UTC')] + expected = Series(expected_data, name=col.name) + tm.assert_series_equal(col, expected) else: raise AssertionError("DateCol loaded with incorrect type " @@ -1298,6 +1300,9 @@ def check(col): self.conn, parse_dates=['DateColWithTz']) if not hasattr(df, 'DateColWithTz'): pytest.skip("no column with datetime with time zone") + col = df.DateColWithTz + assert is_datetime64tz_dtype(col.dtype) + assert str(col.dt.tz) == 'UTC' check(df.DateColWithTz) df = pd.concat(list(pd.read_sql_query("select * from types_test_data", @@ -1307,9 +1312,9 @@ def check(col): assert is_datetime64tz_dtype(col.dtype) assert str(col.dt.tz) == 'UTC' expected = sql.read_sql_table("types_test_data", self.conn) - tm.assert_series_equal(df.DateColWithTz, - expected.DateColWithTz - .astype('datetime64[ns, UTC]')) + col = expected.DateColWithTz + assert is_datetime64tz_dtype(col.dtype) + tm.assert_series_equal(df.DateColWithTz, expected.DateColWithTz) # xref #7139 # this might or might not be converted depending on the postgres driver @@ -1388,8 +1393,10 @@ def test_datetime_date(self): df = DataFrame([date(2014, 1, 1), date(2014, 1, 2)], columns=["a"]) df.to_sql('test_date', self.conn, index=False) res = read_sql_table('test_date', self.conn) + result = res['a'] + expected = to_datetime(df['a']) # comes back as datetime64 - tm.assert_series_equal(res['a'], to_datetime(df['a'])) + tm.assert_series_equal(result, expected) def test_datetime_time(self): # test support for datetime.time diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index a765e2c4ca1bf..6976fe162c5d5 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -2137,7 +2137,7 @@ def test_set_index_datetime(self): '2011-07-19 08:00:00', '2011-07-19 09:00:00'], 'value': range(6)}) df.index = pd.to_datetime(df.pop('datetime'), utc=True) - df.index = df.index.tz_localize('UTC').tz_convert('US/Pacific') + df.index = df.index.tz_convert('US/Pacific') expected = pd.DatetimeIndex(['2011-07-19 07:00:00', '2011-07-19 08:00:00',