From b846c570f9db9ad30f433c97d4e8415e8dcd4e2d Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Thu, 31 Aug 2017 23:50:19 -0700 Subject: [PATCH 01/18] Fxi merge --- doc/source/whatsnew/v0.21.0.txt | 36 ++++++++++++++++++++ pandas/core/tools/datetimes.py | 29 ++++++++++++---- pandas/tests/indexes/datetimes/test_tools.py | 32 +++++++++++++++++ pandas/tests/io/test_sql.py | 21 +++++++----- pandas/tests/test_multilevel.py | 3 +- 5 files changed, 105 insertions(+), 16 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 273cbd8357f85..8c95250f8bf3c 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -298,6 +298,39 @@ length 2+ levels, so a :class:`MultiIndex` is always returned from all of the pd.MultiIndex.from_tuples([('a',), ('b',)]) +.. _whatsnew_0210.api.utc_localization_with_series: + +UTC Localization with Series +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + Previous Behavior + + .. ipython:: python + + s = Series(['20130101 00:00:00'] * 10) + + .. code-block:: python + + In [12]: pd.to_datetime(s, utc=True) + Out[12]: + 0 2013-01-01 + 1 2013-01-01 + 2 2013-01-01 + 3 2013-01-01 + 4 2013-01-01 + 5 2013-01-01 + 6 2013-01-01 + 7 2013-01-01 + 8 2013-01-01 + 9 2013-01-01 + dtype: datetime64[ns] + + New Behavior + + .. ipython:: python + + pd.to_datetime(s, utc=True) + .. _whatsnew_0210.api: Other API Changes @@ -363,10 +396,13 @@ Conversion - Bug in assignment against datetime-like data with ``int`` may incorrectly convert to datetime-like (:issue:`14145`) - Bug in assignment against ``int64`` data with ``np.ndarray`` with ``float64`` dtype may keep ``int64`` dtype (:issue:`14001`) +<<<<<<< 062f6f118fe4ea439ae255a8ff886a532e20ecdb - Fix :func:`DataFrame.memory_usage` to support PyPy. Objects on PyPy do not have a fixed size, so an approximation is used instead (:issue:`17228`) - Fixed the return type of ``IntervalIndex.is_non_overlapping_monotonic`` to be a Python ``bool`` for consistency with similar attributes/methods. Previously returned a ``numpy.bool_``. (:issue:`17237`) - Bug in ``IntervalIndex.is_non_overlapping_monotonic`` when intervals are closed on both sides and overlap at a point (:issue:`16560`) - Bug in :func:`Series.fillna` returns frame when ``inplace=True`` and ``value`` is dict (:issue:`16156`) +======= +>>>>>>> BUG: to_datetime not localizing Series when utc=True (#6415) Indexing ^^^^^^^^ diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index c0f234a36803d..5a547e952f0ed 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -345,6 +345,17 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False, tz = 'utc' if utc else None + def _maybe_convert_to_utc(arg, utc): + if utc: + if isinstance(arg, ABCSeries): + arg = arg.dt.tz_localize('UTC') + elif isinstance(arg, DatetimeIndex): + if arg.tz is None: + arg = arg.tz_localize('UTC') + else: + arg = arg.tz_convert('UTC') + return arg + def _convert_listlike(arg, box, format, name=None, tz=tz): if isinstance(arg, (list, tuple)): @@ -364,7 +375,7 @@ def _convert_listlike(arg, box, format, name=None, tz=tz): return DatetimeIndex(arg, tz=tz, name=name) except ValueError: pass - + arg = _maybe_convert_to_utc(arg, utc) return arg elif unit is not None: @@ -383,12 +394,15 @@ def _convert_listlike(arg, box, format, name=None, tz=tz): elif getattr(arg, 'ndim', 1) > 1: raise TypeError('arg must be a string, datetime, list, tuple, ' '1-d array, or Series') - + # _ensure_object converts Series to numpy array, need to reconvert + # upon return + arg_is_series = isinstance(arg, ABCSeries) arg = _ensure_object(arg) require_iso8601 = False if infer_datetime_format and format is None: - format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst) + format = _guess_datetime_format_for_array(arg, + dayfirst=dayfirst) if format is not None: # There is a special fast-path for iso8601 formatted @@ -415,7 +429,8 @@ def _convert_listlike(arg, box, format, name=None, tz=tz): # fallback if result is None: try: - result = tslib.array_strptime(arg, format, exact=exact, + result = tslib.array_strptime(arg, format, + exact=exact, errors=errors) except tslib.OutOfBoundsDatetime: if errors == 'raise': @@ -439,9 +454,11 @@ def _convert_listlike(arg, box, format, name=None, tz=tz): yearfirst=yearfirst, require_iso8601=require_iso8601 ) - if is_datetime64_dtype(result) and box: result = DatetimeIndex(result, tz=tz, name=name) + # GH 6415 + elif arg_is_series: + result = _maybe_convert_to_utc(Series(result, name=name), utc) return result except ValueError as e: @@ -516,7 +533,7 @@ def _convert_listlike(arg, box, format, name=None, tz=tz): result = arg elif isinstance(arg, ABCSeries): from pandas import Series - values = _convert_listlike(arg._values, False, format) + values = _convert_listlike(arg, False, format, name=arg.name) result = Series(values, index=arg.index, name=arg.name) elif isinstance(arg, (ABCDataFrame, MutableMapping)): result = _assemble_from_unit_mappings(arg, errors=errors) diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index 50669ee357bbd..bee248de591e3 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -270,6 +270,38 @@ def test_to_datetime_utc_is_true(self): expected = pd.DatetimeIndex(data=date_range) tm.assert_index_equal(result, expected) + def test_to_datetime_utc_true_with_series(self): + # GH 6415: UTC=True with Series + data = ['20100102 121314', '20100102 121315'] + expected_data = [pd.Timestamp('2010-01-02 12:13:14', tz='utc'), + pd.Timestamp('2010-01-02 12:13:15', tz='utc')] + result = pd.to_datetime(pd.Series(data), + format='%Y%m%d %H%M%S', + utc=True) + expected = pd.Series(expected_data) + tm.assert_series_equal(result, expected) + result = pd.to_datetime(pd.Index(data), + format='%Y%m%d %H%M%S', + utc=True) + expected = pd.DatetimeIndex(expected_data) + tm.assert_index_equal(result, expected) + + # GH 15760 UTC=True with Series + ts = 1.5e18 + result = pd.to_datetime(pd.Series([ts]), utc=True) + expected = pd.Series([pd.Timestamp(ts, tz='utc')]) + tm.assert_series_equal(result, expected) + + test_dates = ['2013-01-01 00:00:00-01:00'] * 10 + expected_data = [pd.Timestamp('20130101 01:00:00', tz='utc')] * 10 + expected = pd.Series(expected_data) + ser = Series(test_dates) + result = pd.to_datetime(ser, utc=True) + tm.assert_series_equal(result, expected) + ser_naive = Series(test_dates, dtype='datetime64[ns]') + result = pd.to_datetime(ser_naive, utc=True) + tm.assert_series_equal(result, expected) + def test_to_datetime_tz_psycopg2(self): # xref 8260 diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index a7c42391effe6..45c0ed7f25e9d 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -606,14 +606,15 @@ def test_date_parsing(self): # No Parsing df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn) assert not issubclass(df.DateCol.dtype.type, np.datetime64) - + # Now that GH 6415 is fixed, dates are automatically parsed to UTC + utc_dtype = pd.core.dtypes.dtypes.DatetimeTZDtypeType df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn, parse_dates=['DateCol']) - assert issubclass(df.DateCol.dtype.type, np.datetime64) + assert issubclass(df.DateCol.dtype.type, utc_dtype) df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn, parse_dates={'DateCol': '%Y-%m-%d %H:%M:%S'}) - assert issubclass(df.DateCol.dtype.type, np.datetime64) + assert issubclass(df.DateCol.dtype.type, utc_dtype) df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn, parse_dates=['IntDateCol']) @@ -631,8 +632,9 @@ def test_date_and_index(self): df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn, index_col='DateCol', parse_dates=['DateCol', 'IntDateCol']) - - assert issubclass(df.index.dtype.type, np.datetime64) + # Now that GH 6415 is fixed, dates are automatically parsed to UTC + utc_dtype = pd.core.dtypes.dtypes.DatetimeTZDtypeType + assert issubclass(df.index.dtype.type, utc_dtype) assert issubclass(df.IntDateCol.dtype.type, np.datetime64) def test_timedelta(self): @@ -1319,18 +1321,19 @@ def check(col): def test_date_parsing(self): # No Parsing df = sql.read_sql_table("types_test_data", self.conn) - + # Now that GH 6415 is fixed, dates are automatically parsed to UTC + utc_dtype = pd.core.dtypes.dtypes.DatetimeTZDtypeType df = sql.read_sql_table("types_test_data", self.conn, parse_dates=['DateCol']) - assert issubclass(df.DateCol.dtype.type, np.datetime64) + assert issubclass(df.DateCol.dtype.type, utc_dtype) df = sql.read_sql_table("types_test_data", self.conn, parse_dates={'DateCol': '%Y-%m-%d %H:%M:%S'}) - assert issubclass(df.DateCol.dtype.type, np.datetime64) + assert issubclass(df.DateCol.dtype.type, utc_dtype) df = sql.read_sql_table("types_test_data", self.conn, parse_dates={ 'DateCol': {'format': '%Y-%m-%d %H:%M:%S'}}) - assert issubclass(df.DateCol.dtype.type, np.datetime64) + assert issubclass(df.DateCol.dtype.type, utc_dtype) df = sql.read_sql_table( "types_test_data", self.conn, parse_dates=['IntDateCol']) diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index a765e2c4ca1bf..db486d2172f8a 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -2137,7 +2137,8 @@ def test_set_index_datetime(self): '2011-07-19 08:00:00', '2011-07-19 09:00:00'], 'value': range(6)}) df.index = pd.to_datetime(df.pop('datetime'), utc=True) - df.index = df.index.tz_localize('UTC').tz_convert('US/Pacific') + # Removed 'tz_localize('utc') below after GH 6415 was fixed + df.index = df.index.tz_convert('US/Pacific') expected = pd.DatetimeIndex(['2011-07-19 07:00:00', '2011-07-19 08:00:00', From 4319527fc37a3c02d373a53906788519ef66c764 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Fri, 28 Jul 2017 16:25:23 -0700 Subject: [PATCH 02/18] BUG: to_datetime not localizing Series when utc=True (#6415) Modify test case Comment about test edit, move conversion logic to convert_listlike Add new section in whatsnew and update test Alter SQL tests Modify whatsnew and make new wrapper function to handle UTC conversion Simiplified whatsnew and reverted arg renaming --- doc/source/whatsnew/v0.21.0.txt | 3 --- 1 file changed, 3 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 8c95250f8bf3c..05cc2bede2434 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -396,13 +396,10 @@ Conversion - Bug in assignment against datetime-like data with ``int`` may incorrectly convert to datetime-like (:issue:`14145`) - Bug in assignment against ``int64`` data with ``np.ndarray`` with ``float64`` dtype may keep ``int64`` dtype (:issue:`14001`) -<<<<<<< 062f6f118fe4ea439ae255a8ff886a532e20ecdb - Fix :func:`DataFrame.memory_usage` to support PyPy. Objects on PyPy do not have a fixed size, so an approximation is used instead (:issue:`17228`) - Fixed the return type of ``IntervalIndex.is_non_overlapping_monotonic`` to be a Python ``bool`` for consistency with similar attributes/methods. Previously returned a ``numpy.bool_``. (:issue:`17237`) - Bug in ``IntervalIndex.is_non_overlapping_monotonic`` when intervals are closed on both sides and overlap at a point (:issue:`16560`) - Bug in :func:`Series.fillna` returns frame when ``inplace=True`` and ``value`` is dict (:issue:`16156`) -======= ->>>>>>> BUG: to_datetime not localizing Series when utc=True (#6415) Indexing ^^^^^^^^ From 9a93811e06a18355ad045fef9aa6bf25ec1f23da Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 13 Aug 2017 15:31:34 -0700 Subject: [PATCH 03/18] Adjust SQL tests --- pandas/core/tools/datetimes.py | 2 +- pandas/tests/io/test_sql.py | 24 +++++++++++++++++------- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 5a547e952f0ed..17315ee107705 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -457,7 +457,7 @@ def _convert_listlike(arg, box, format, name=None, tz=tz): if is_datetime64_dtype(result) and box: result = DatetimeIndex(result, tz=tz, name=name) # GH 6415 - elif arg_is_series: + elif arg_is_series and utc: result = _maybe_convert_to_utc(Series(result, name=name), utc) return result diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 45c0ed7f25e9d..341b68dbb4a09 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -602,7 +602,7 @@ def test_execute_sql(self): tm.equalContents(row, [5.1, 3.5, 1.4, 0.2, 'Iris-setosa']) def test_date_parsing(self): - # Test date parsing in read_sq + # Test date parsing in read_sql # No Parsing df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn) assert not issubclass(df.DateCol.dtype.type, np.datetime64) @@ -1248,7 +1248,9 @@ def test_default_date_load(self): # IMPORTANT - sqlite has no native date type, so shouldn't parse, but # MySQL SHOULD be converted. - assert issubclass(df.DateCol.dtype.type, np.datetime64) + # Now that GH 6415 is fixed, dates are automatically parsed to UTC + utc_dtype = pd.core.dtypes.dtypes.DatetimeTZDtypeType + assert issubclass(df.DateCol.dtype.type, utc_dtype) def test_datetime_with_timezone(self): # edge case that converts postgresql datetime with time zone types @@ -1333,7 +1335,7 @@ def test_date_parsing(self): df = sql.read_sql_table("types_test_data", self.conn, parse_dates={ 'DateCol': {'format': '%Y-%m-%d %H:%M:%S'}}) - assert issubclass(df.DateCol.dtype.type, utc_dtype) + assert issubclass(df.DateCol.dtype.type, np.datetime64) df = sql.read_sql_table( "types_test_data", self.conn, parse_dates=['IntDateCol']) @@ -1355,7 +1357,11 @@ def test_datetime(self): # with read_table -> type information from schema used result = sql.read_sql_table('test_datetime', self.conn) result = result.drop('index', axis=1) - tm.assert_frame_equal(result, df) + # After GH 6415, dates outbound from a db will be localized to UTC + # xref GH 7364 + expected = df.copy() + expected['A'] = expected['A'].dt.tz_localize('UTC') + tm.assert_frame_equal(result, expected) # with read_sql -> no type information -> sqlite has no native result = sql.read_sql_query('SELECT * FROM test_datetime', self.conn) @@ -1375,7 +1381,11 @@ def test_datetime_NaT(self): # with read_table -> type information from schema used result = sql.read_sql_table('test_datetime', self.conn) - tm.assert_frame_equal(result, df) + # After GH 6415, dates outbound from a db will be localized to UTC + # xref GH 7364 + expected = df.copy() + expected['A'] = expected['A'].dt.tz_localize('UTC') + tm.assert_frame_equal(result, expected) # with read_sql -> no type information -> sqlite has no native result = sql.read_sql_query('SELECT * FROM test_datetime', self.conn) @@ -1391,8 +1401,8 @@ def test_datetime_date(self): df = DataFrame([date(2014, 1, 1), date(2014, 1, 2)], columns=["a"]) df.to_sql('test_date', self.conn, index=False) res = read_sql_table('test_date', self.conn) - # comes back as datetime64 - tm.assert_series_equal(res['a'], to_datetime(df['a'])) + # GH 6415 comes back as datetime64[ns, UTC] + tm.assert_series_equal(res['a'], to_datetime(df['a'], utc=True)) def test_datetime_time(self): # test support for datetime.time From 76471e9a5a72d785f1e54278158101e739ed9379 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Mon, 14 Aug 2017 21:34:39 -0700 Subject: [PATCH 04/18] Add whatsnew note about impact on read_sql --- doc/source/whatsnew/v0.21.0.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 05cc2bede2434..f0fbc7ee141b2 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -331,6 +331,8 @@ UTC Localization with Series pd.to_datetime(s, utc=True) +This new behavior will also localize datetime columns in DataFrames returned from :func:`read_sql` which previously returned datetime columns as naive UTC. + .. _whatsnew_0210.api: Other API Changes From eed2d3d20f373359eb250f4ccf60a6ff335258b4 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Mon, 14 Aug 2017 21:37:33 -0700 Subject: [PATCH 05/18] PEP8 fixes --- pandas/tests/io/test_sql.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 341b68dbb4a09..82e9e28e5301f 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -1357,7 +1357,7 @@ def test_datetime(self): # with read_table -> type information from schema used result = sql.read_sql_table('test_datetime', self.conn) result = result.drop('index', axis=1) - # After GH 6415, dates outbound from a db will be localized to UTC + # After GH 6415, dates outbound from a db will be localized to UTC # xref GH 7364 expected = df.copy() expected['A'] = expected['A'].dt.tz_localize('UTC') @@ -1381,7 +1381,7 @@ def test_datetime_NaT(self): # with read_table -> type information from schema used result = sql.read_sql_table('test_datetime', self.conn) - # After GH 6415, dates outbound from a db will be localized to UTC + # After GH 6415, dates outbound from a db will be localized to UTC # xref GH 7364 expected = df.copy() expected['A'] = expected['A'].dt.tz_localize('UTC') From f56859a36c54114750d312e17eb28647dce9145c Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Wed, 16 Aug 2017 23:00:46 -0700 Subject: [PATCH 06/18] Have DatetimeIndex handle the UTC conversion for Series and reformat tests --- pandas/core/tools/datetimes.py | 24 +++------------ pandas/tests/indexes/datetimes/test_tools.py | 32 +++++++++++--------- 2 files changed, 22 insertions(+), 34 deletions(-) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 17315ee107705..75f184266b794 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -345,17 +345,6 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False, tz = 'utc' if utc else None - def _maybe_convert_to_utc(arg, utc): - if utc: - if isinstance(arg, ABCSeries): - arg = arg.dt.tz_localize('UTC') - elif isinstance(arg, DatetimeIndex): - if arg.tz is None: - arg = arg.tz_localize('UTC') - else: - arg = arg.tz_convert('UTC') - return arg - def _convert_listlike(arg, box, format, name=None, tz=tz): if isinstance(arg, (list, tuple)): @@ -375,7 +364,8 @@ def _convert_listlike(arg, box, format, name=None, tz=tz): return DatetimeIndex(arg, tz=tz, name=name) except ValueError: pass - arg = _maybe_convert_to_utc(arg, utc) + + return arg elif unit is not None: @@ -394,9 +384,8 @@ def _convert_listlike(arg, box, format, name=None, tz=tz): elif getattr(arg, 'ndim', 1) > 1: raise TypeError('arg must be a string, datetime, list, tuple, ' '1-d array, or Series') - # _ensure_object converts Series to numpy array, need to reconvert - # upon return - arg_is_series = isinstance(arg, ABCSeries) + + arg = _ensure_object(arg) require_iso8601 = False @@ -456,9 +445,6 @@ def _convert_listlike(arg, box, format, name=None, tz=tz): ) if is_datetime64_dtype(result) and box: result = DatetimeIndex(result, tz=tz, name=name) - # GH 6415 - elif arg_is_series and utc: - result = _maybe_convert_to_utc(Series(result, name=name), utc) return result except ValueError as e: @@ -533,7 +519,7 @@ def _convert_listlike(arg, box, format, name=None, tz=tz): result = arg elif isinstance(arg, ABCSeries): from pandas import Series - values = _convert_listlike(arg, False, format, name=arg.name) + values = _convert_listlike(arg, True, format) result = Series(values, index=arg.index, name=arg.name) elif isinstance(arg, (ABCDataFrame, MutableMapping)): result = _assemble_from_unit_mappings(arg, errors=errors) diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index bee248de591e3..12ba59343d5d1 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -269,37 +269,39 @@ def test_to_datetime_utc_is_true(self): result = pd.to_datetime(date_range, utc=True) expected = pd.DatetimeIndex(data=date_range) tm.assert_index_equal(result, expected) - - def test_to_datetime_utc_true_with_series(self): + + @pytest.mark.parametrize("init_constructor, end_constructor, test_method", + [(Index, DatetimeIndex, tm.assert_index_equal), + (Series, Series, tm.assert_series_equal)]) + def test_to_datetime_utc_true_with_series(self, + init_constructor, + end_constructor, + test_method): # GH 6415: UTC=True with Series data = ['20100102 121314', '20100102 121315'] expected_data = [pd.Timestamp('2010-01-02 12:13:14', tz='utc'), pd.Timestamp('2010-01-02 12:13:15', tz='utc')] - result = pd.to_datetime(pd.Series(data), - format='%Y%m%d %H%M%S', - utc=True) - expected = pd.Series(expected_data) - tm.assert_series_equal(result, expected) - result = pd.to_datetime(pd.Index(data), + + result = pd.to_datetime(init_constructor(data), format='%Y%m%d %H%M%S', utc=True) - expected = pd.DatetimeIndex(expected_data) - tm.assert_index_equal(result, expected) + expected = end_constructor(expected_data) + test_method(result, expected) + def test_to_datetime_utc_true_with_series_single_value(self): # GH 15760 UTC=True with Series ts = 1.5e18 result = pd.to_datetime(pd.Series([ts]), utc=True) expected = pd.Series([pd.Timestamp(ts, tz='utc')]) tm.assert_series_equal(result, expected) + @pytest.mark.parametrize("dtype", [None, 'datetime64[ns]']) + def test_to_datetime_utc_true_with_naive_series(self, dtype): test_dates = ['2013-01-01 00:00:00-01:00'] * 10 + ser = pd.Series(test_dates, dtype=dtype) + result = pd.to_datetime(ser, utc=True) expected_data = [pd.Timestamp('20130101 01:00:00', tz='utc')] * 10 expected = pd.Series(expected_data) - ser = Series(test_dates) - result = pd.to_datetime(ser, utc=True) - tm.assert_series_equal(result, expected) - ser_naive = Series(test_dates, dtype='datetime64[ns]') - result = pd.to_datetime(ser_naive, utc=True) tm.assert_series_equal(result, expected) def test_to_datetime_tz_psycopg2(self): From d069b058ae17bd832b812eed514e540e6841cf3c Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Tue, 22 Aug 2017 22:19:58 -0700 Subject: [PATCH 07/18] Address whatsnew changes & fix sql tests --- doc/source/whatsnew/v0.21.0.txt | 4 +++- pandas/io/sql.py | 16 ++++++++------ pandas/tests/io/test_sql.py | 37 +++++++++++++-------------------- 3 files changed, 27 insertions(+), 30 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index f0fbc7ee141b2..c12e7a72b57eb 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -303,6 +303,8 @@ length 2+ levels, so a :class:`MultiIndex` is always returned from all of the UTC Localization with Series ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Previously, :func:`to_datetime` did not localize datetime ``Series`` data as when ``utc=True`` was passed. Now, :func:`to_datetime` will correctly localize `Series` with a `datetime64[ns, UTC]` data type. (:issue:`6415`). + Previous Behavior .. ipython:: python @@ -331,7 +333,7 @@ UTC Localization with Series pd.to_datetime(s, utc=True) -This new behavior will also localize datetime columns in DataFrames returned from :func:`read_sql` which previously returned datetime columns as naive UTC. +Additionally, DataFrames with datetime columns returned by :func:`read_sql` will also be localized to UTC only if the original SQL columns were timezone aware datetime columns. .. _whatsnew_0210.api: diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 9aa47e5c69850..6aef9e367bfb2 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -99,24 +99,24 @@ def _convert_params(sql, params): return args -def _handle_date_column(col, format=None): +def _handle_date_column(col, utc=None, format=None): if isinstance(format, dict): return to_datetime(col, errors='ignore', **format) else: if format in ['D', 's', 'ms', 'us', 'ns']: - return to_datetime(col, errors='coerce', unit=format, utc=True) + return to_datetime(col, errors='coerce', unit=format, utc=utc) elif (issubclass(col.dtype.type, np.floating) or issubclass(col.dtype.type, np.integer)): # parse dates as timestamp format = 's' if format is None else format - return to_datetime(col, errors='coerce', unit=format, utc=True) + return to_datetime(col, errors='coerce', unit=format, utc=utc) elif is_datetime64tz_dtype(col): # coerce to UTC timezone # GH11216 return (to_datetime(col, errors='coerce') .astype('datetime64[ns, UTC]')) else: - return to_datetime(col, errors='coerce', format=format, utc=True) + return to_datetime(col, errors='coerce', format=format, utc=utc) def _parse_date_columns(data_frame, parse_dates): @@ -818,10 +818,14 @@ def _harmonize_columns(self, parse_dates=None): df_col = self.frame[col_name] # the type the dataframe column should have col_type = self._get_dtype(sql_col.type) - if (col_type is datetime or col_type is date or col_type is DatetimeTZDtype): - self.frame[col_name] = _handle_date_column(df_col) + if col_type is DatetimeTZDtype: + # Convert SQL Datetime columns with tz to UTC + self.frame[col_name] = _handle_date_column(df_col, + utc=True) + else: + self.frame[col_name] = _handle_date_column(df_col) elif col_type is float: # floats support NA, can always convert! diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 82e9e28e5301f..a4b5a83b36a5b 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -606,15 +606,14 @@ def test_date_parsing(self): # No Parsing df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn) assert not issubclass(df.DateCol.dtype.type, np.datetime64) - # Now that GH 6415 is fixed, dates are automatically parsed to UTC - utc_dtype = pd.core.dtypes.dtypes.DatetimeTZDtypeType + df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn, parse_dates=['DateCol']) - assert issubclass(df.DateCol.dtype.type, utc_dtype) + assert issubclass(df.DateCol.dtype.type, np.datetime64) df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn, parse_dates={'DateCol': '%Y-%m-%d %H:%M:%S'}) - assert issubclass(df.DateCol.dtype.type, utc_dtype) + assert issubclass(df.DateCol.dtype.type, np.datetime64) df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn, parse_dates=['IntDateCol']) @@ -632,9 +631,8 @@ def test_date_and_index(self): df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn, index_col='DateCol', parse_dates=['DateCol', 'IntDateCol']) - # Now that GH 6415 is fixed, dates are automatically parsed to UTC - utc_dtype = pd.core.dtypes.dtypes.DatetimeTZDtypeType - assert issubclass(df.index.dtype.type, utc_dtype) + + assert issubclass(df.index.dtype.type, np.datetime64) assert issubclass(df.IntDateCol.dtype.type, np.datetime64) def test_timedelta(self): @@ -1323,15 +1321,14 @@ def check(col): def test_date_parsing(self): # No Parsing df = sql.read_sql_table("types_test_data", self.conn) - # Now that GH 6415 is fixed, dates are automatically parsed to UTC - utc_dtype = pd.core.dtypes.dtypes.DatetimeTZDtypeType + df = sql.read_sql_table("types_test_data", self.conn, parse_dates=['DateCol']) - assert issubclass(df.DateCol.dtype.type, utc_dtype) + assert issubclass(df.DateCol.dtype.type, np.datetime64) df = sql.read_sql_table("types_test_data", self.conn, parse_dates={'DateCol': '%Y-%m-%d %H:%M:%S'}) - assert issubclass(df.DateCol.dtype.type, utc_dtype) + assert issubclass(df.DateCol.dtype.type, np.datetime64) df = sql.read_sql_table("types_test_data", self.conn, parse_dates={ 'DateCol': {'format': '%Y-%m-%d %H:%M:%S'}}) @@ -1357,11 +1354,7 @@ def test_datetime(self): # with read_table -> type information from schema used result = sql.read_sql_table('test_datetime', self.conn) result = result.drop('index', axis=1) - # After GH 6415, dates outbound from a db will be localized to UTC - # xref GH 7364 - expected = df.copy() - expected['A'] = expected['A'].dt.tz_localize('UTC') - tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result, df) # with read_sql -> no type information -> sqlite has no native result = sql.read_sql_query('SELECT * FROM test_datetime', self.conn) @@ -1381,11 +1374,7 @@ def test_datetime_NaT(self): # with read_table -> type information from schema used result = sql.read_sql_table('test_datetime', self.conn) - # After GH 6415, dates outbound from a db will be localized to UTC - # xref GH 7364 - expected = df.copy() - expected['A'] = expected['A'].dt.tz_localize('UTC') - tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result, df) # with read_sql -> no type information -> sqlite has no native result = sql.read_sql_query('SELECT * FROM test_datetime', self.conn) @@ -1401,8 +1390,10 @@ def test_datetime_date(self): df = DataFrame([date(2014, 1, 1), date(2014, 1, 2)], columns=["a"]) df.to_sql('test_date', self.conn, index=False) res = read_sql_table('test_date', self.conn) - # GH 6415 comes back as datetime64[ns, UTC] - tm.assert_series_equal(res['a'], to_datetime(df['a'], utc=True)) + expected = res['a'] + result = to_datetime(df['a']) + # comes back as datetime64 + tm.assert_series_equal(result, expected) def test_datetime_time(self): # test support for datetime.time From f24b82c9aeff21d07b167572c0d1cf62c0912a7a Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Tue, 22 Aug 2017 22:35:00 -0700 Subject: [PATCH 08/18] Undo formatting --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/tools/datetimes.py | 9 +++------ pandas/io/sql.py | 2 +- 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index c12e7a72b57eb..402e6bbea9c51 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -405,6 +405,7 @@ Conversion - Bug in ``IntervalIndex.is_non_overlapping_monotonic`` when intervals are closed on both sides and overlap at a point (:issue:`16560`) - Bug in :func:`Series.fillna` returns frame when ``inplace=True`` and ``value`` is dict (:issue:`16156`) + Indexing ^^^^^^^^ diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 75f184266b794..2694ca3e8463b 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -365,7 +365,6 @@ def _convert_listlike(arg, box, format, name=None, tz=tz): except ValueError: pass - return arg elif unit is not None: @@ -385,13 +384,11 @@ def _convert_listlike(arg, box, format, name=None, tz=tz): raise TypeError('arg must be a string, datetime, list, tuple, ' '1-d array, or Series') - arg = _ensure_object(arg) require_iso8601 = False if infer_datetime_format and format is None: - format = _guess_datetime_format_for_array(arg, - dayfirst=dayfirst) + format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst) if format is not None: # There is a special fast-path for iso8601 formatted @@ -418,8 +415,7 @@ def _convert_listlike(arg, box, format, name=None, tz=tz): # fallback if result is None: try: - result = tslib.array_strptime(arg, format, - exact=exact, + result = tslib.array_strptime(arg, format, exact=exact, errors=errors) except tslib.OutOfBoundsDatetime: if errors == 'raise': @@ -443,6 +439,7 @@ def _convert_listlike(arg, box, format, name=None, tz=tz): yearfirst=yearfirst, require_iso8601=require_iso8601 ) + if is_datetime64_dtype(result) and box: result = DatetimeIndex(result, tz=tz, name=name) return result diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 6aef9e367bfb2..29c6b75b183c6 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -821,7 +821,7 @@ def _harmonize_columns(self, parse_dates=None): if (col_type is datetime or col_type is date or col_type is DatetimeTZDtype): if col_type is DatetimeTZDtype: - # Convert SQL Datetime columns with tz to UTC + # Convert tz-aware Datetime SQL columns to UTC self.frame[col_name] = _handle_date_column(df_col, utc=True) else: From 808a81007b67df792cd55e48b457aac8e5e47021 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Tue, 22 Aug 2017 22:38:02 -0700 Subject: [PATCH 09/18] Revert SQL test --- pandas/tests/io/test_sql.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index a4b5a83b36a5b..1678389ea6fb5 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -1246,9 +1246,7 @@ def test_default_date_load(self): # IMPORTANT - sqlite has no native date type, so shouldn't parse, but # MySQL SHOULD be converted. - # Now that GH 6415 is fixed, dates are automatically parsed to UTC - utc_dtype = pd.core.dtypes.dtypes.DatetimeTZDtypeType - assert issubclass(df.DateCol.dtype.type, utc_dtype) + assert issubclass(df.DateCol.dtype.type, np.datetime64) def test_datetime_with_timezone(self): # edge case that converts postgresql datetime with time zone types From c142975bbdd38cd30debf502c7570d9489811bb3 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Thu, 24 Aug 2017 00:12:00 -0700 Subject: [PATCH 10/18] Address comments and confirm timezone tests --- doc/source/whatsnew/v0.21.0.txt | 11 ++--------- pandas/core/tools/datetimes.py | 2 +- pandas/io/sql.py | 1 + pandas/tests/indexes/datetimes/test_tools.py | 10 ++++++---- pandas/tests/io/test_sql.py | 11 +++++++++-- 5 files changed, 19 insertions(+), 16 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 402e6bbea9c51..67dc48ff2502c 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -303,13 +303,13 @@ length 2+ levels, so a :class:`MultiIndex` is always returned from all of the UTC Localization with Series ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Previously, :func:`to_datetime` did not localize datetime ``Series`` data as when ``utc=True`` was passed. Now, :func:`to_datetime` will correctly localize `Series` with a `datetime64[ns, UTC]` data type. (:issue:`6415`). +Previously, :func:`to_datetime` did not localize datetime ``Series`` data when ``utc=True`` was passed. Now, :func:`to_datetime` will correctly localize `Series` with a `datetime64[ns, UTC]` data type to be consistent with how list-like and Index data are handled. (:issue:`6415`). Previous Behavior .. ipython:: python - s = Series(['20130101 00:00:00'] * 10) + s = Series(['20130101 00:00:00'] * 3) .. code-block:: python @@ -318,13 +318,6 @@ Previously, :func:`to_datetime` did not localize datetime ``Series`` data as whe 0 2013-01-01 1 2013-01-01 2 2013-01-01 - 3 2013-01-01 - 4 2013-01-01 - 5 2013-01-01 - 6 2013-01-01 - 7 2013-01-01 - 8 2013-01-01 - 9 2013-01-01 dtype: datetime64[ns] New Behavior diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 2694ca3e8463b..9ff0275a7c370 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -516,7 +516,7 @@ def _convert_listlike(arg, box, format, name=None, tz=tz): result = arg elif isinstance(arg, ABCSeries): from pandas import Series - values = _convert_listlike(arg, True, format) + values = _convert_listlike(arg._values, True, format) result = Series(values, index=arg.index, name=arg.name) elif isinstance(arg, (ABCDataFrame, MutableMapping)): result = _assemble_from_unit_mappings(arg, errors=errors) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 29c6b75b183c6..4c3129fe745d4 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -818,6 +818,7 @@ def _harmonize_columns(self, parse_dates=None): df_col = self.frame[col_name] # the type the dataframe column should have col_type = self._get_dtype(sql_col.type) + if (col_type is datetime or col_type is date or col_type is DatetimeTZDtype): if col_type is DatetimeTZDtype: diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index 12ba59343d5d1..7b0ba1f4c71d2 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -272,11 +272,13 @@ def test_to_datetime_utc_is_true(self): @pytest.mark.parametrize("init_constructor, end_constructor, test_method", [(Index, DatetimeIndex, tm.assert_index_equal), + (list, DatetimeIndex, tm.assert_index_equal), + (np.array, DatetimeIndex, tm.assert_index_equal), (Series, Series, tm.assert_series_equal)]) - def test_to_datetime_utc_true_with_series(self, - init_constructor, - end_constructor, - test_method): + def test_to_datetime_utc_true_with_constructors(self, + init_constructor, + end_constructor, + test_method): # GH 6415: UTC=True with Series data = ['20100102 121314', '20100102 121315'] expected_data = [pd.Timestamp('2010-01-02 12:13:14', tz='utc'), diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 1678389ea6fb5..0bdf13226fad5 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -1277,6 +1277,13 @@ def check(col): # "2000-06-01 07:00:00" assert col[1] == Timestamp('2000-06-01 07:00:00', tz='UTC') + # Double check that the Series has been localized correctly + # GH 6415 + expected_data = [Timestamp('2000-01-01 08:00:00', tz='UTC'), + Timestamp('2000-06-01 07:00:00', tz='UTC')] + expected = Series(expected_data) + tm.assert_series_equal(col, expected) + else: raise AssertionError("DateCol loaded with incorrect type " "-> {0}".format(col.dtype)) @@ -1388,8 +1395,8 @@ def test_datetime_date(self): df = DataFrame([date(2014, 1, 1), date(2014, 1, 2)], columns=["a"]) df.to_sql('test_date', self.conn, index=False) res = read_sql_table('test_date', self.conn) - expected = res['a'] - result = to_datetime(df['a']) + result = res['a'] + expected = to_datetime(df['a']) # comes back as datetime64 tm.assert_series_equal(result, expected) From e8f5d4e53efeba59cceab5347720498426bd25b9 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Thu, 24 Aug 2017 10:04:16 -0700 Subject: [PATCH 11/18] Ensure expected name matches --- pandas/tests/io/test_sql.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 0bdf13226fad5..d10c16322382b 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -1281,7 +1281,7 @@ def check(col): # GH 6415 expected_data = [Timestamp('2000-01-01 08:00:00', tz='UTC'), Timestamp('2000-06-01 07:00:00', tz='UTC')] - expected = Series(expected_data) + expected = Series(expected_data, name=col.name) tm.assert_series_equal(col, expected) else: From 4d17722a4a190cdbfbd08b30836f912010f2e3d0 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Thu, 24 Aug 2017 12:30:04 -0700 Subject: [PATCH 12/18] Fix linting errors --- pandas/io/sql.py | 2 +- pandas/tests/indexes/datetimes/test_tools.py | 16 ++++++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 4c3129fe745d4..de3929bac6cfb 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -823,7 +823,7 @@ def _harmonize_columns(self, parse_dates=None): col_type is DatetimeTZDtype): if col_type is DatetimeTZDtype: # Convert tz-aware Datetime SQL columns to UTC - self.frame[col_name] = _handle_date_column(df_col, + self.frame[col_name] = _handle_date_column(df_col, utc=True) else: self.frame[col_name] = _handle_date_column(df_col) diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index 7b0ba1f4c71d2..fe1b75e00628d 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -269,15 +269,15 @@ def test_to_datetime_utc_is_true(self): result = pd.to_datetime(date_range, utc=True) expected = pd.DatetimeIndex(data=date_range) tm.assert_index_equal(result, expected) - + @pytest.mark.parametrize("init_constructor, end_constructor, test_method", - [(Index, DatetimeIndex, tm.assert_index_equal), - (list, DatetimeIndex, tm.assert_index_equal), - (np.array, DatetimeIndex, tm.assert_index_equal), - (Series, Series, tm.assert_series_equal)]) - def test_to_datetime_utc_true_with_constructors(self, - init_constructor, - end_constructor, + [(Index, DatetimeIndex, tm.assert_index_equal), + (list, DatetimeIndex, tm.assert_index_equal), + (np.array, DatetimeIndex, tm.assert_index_equal), + (Series, Series, tm.assert_series_equal)]) + def test_to_datetime_utc_true_with_constructors(self, + init_constructor, + end_constructor, test_method): # GH 6415: UTC=True with Series data = ['20100102 121314', '20100102 121315'] From 9e9b89e281fc466f54fe9d45b707f992df1e0d89 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Fri, 25 Aug 2017 11:31:14 -0700 Subject: [PATCH 13/18] Whatsnew and tests fixups --- doc/source/whatsnew/v0.21.0.txt | 4 ++-- pandas/tests/indexes/datetimes/test_tools.py | 17 ++++++----------- pandas/tests/io/test_sql.py | 13 +++++-------- 3 files changed, 13 insertions(+), 21 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 67dc48ff2502c..4063a2a95bbd2 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -311,7 +311,7 @@ Previously, :func:`to_datetime` did not localize datetime ``Series`` data when ` s = Series(['20130101 00:00:00'] * 3) - .. code-block:: python + .. code-block:: ipython In [12]: pd.to_datetime(s, utc=True) Out[12]: @@ -326,7 +326,7 @@ Previously, :func:`to_datetime` did not localize datetime ``Series`` data when ` pd.to_datetime(s, utc=True) -Additionally, DataFrames with datetime columns returned by :func:`read_sql` will also be localized to UTC only if the original SQL columns were timezone aware datetime columns. +Additionally, DataFrames with datetime columns returned by :func:`read_sql_table` and :func:`read_sql_query` with the `parse_dates` argument specified will also be localized to UTC only if the original SQL columns were timezone aware datetime columns. .. _whatsnew_0210.api: diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index fe1b75e00628d..486ddf1dc1ac1 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -260,16 +260,6 @@ def test_to_datetime_tz_pytz(self): dtype='datetime64[ns, UTC]', freq=None) tm.assert_index_equal(result, expected) - def test_to_datetime_utc_is_true(self): - # See gh-11934 - start = pd.Timestamp('2014-01-01', tz='utc') - end = pd.Timestamp('2014-01-03', tz='utc') - date_range = pd.bdate_range(start, end) - - result = pd.to_datetime(date_range, utc=True) - expected = pd.DatetimeIndex(data=date_range) - tm.assert_index_equal(result, expected) - @pytest.mark.parametrize("init_constructor, end_constructor, test_method", [(Index, DatetimeIndex, tm.assert_index_equal), (list, DatetimeIndex, tm.assert_index_equal), @@ -279,7 +269,7 @@ def test_to_datetime_utc_true_with_constructors(self, init_constructor, end_constructor, test_method): - # GH 6415: UTC=True with Series + # See gh-11934 & gh-6415 data = ['20100102 121314', '20100102 121315'] expected_data = [pd.Timestamp('2010-01-02 12:13:14', tz='utc'), pd.Timestamp('2010-01-02 12:13:15', tz='utc')] @@ -290,6 +280,11 @@ def test_to_datetime_utc_true_with_constructors(self, expected = end_constructor(expected_data) test_method(result, expected) + # Test scalar case as well + for scalar, expected in zip(data, expected_data): + result = pd.to_datetime(scalar, format='%Y%m%d %H%M%S', utc=True) + assert result == expected + def test_to_datetime_utc_true_with_series_single_value(self): # GH 15760 UTC=True with Series ts = 1.5e18 diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index d10c16322382b..78fb31d819c2d 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -1271,13 +1271,8 @@ def check(col): # "2000-01-01 00:00:00-08:00" should convert to # "2000-01-01 08:00:00" - assert col[0] == Timestamp('2000-01-01 08:00:00', tz='UTC') - # "2000-06-01 00:00:00-07:00" should convert to # "2000-06-01 07:00:00" - assert col[1] == Timestamp('2000-06-01 07:00:00', tz='UTC') - - # Double check that the Series has been localized correctly # GH 6415 expected_data = [Timestamp('2000-01-01 08:00:00', tz='UTC'), Timestamp('2000-06-01 07:00:00', tz='UTC')] @@ -1305,6 +1300,9 @@ def check(col): self.conn, parse_dates=['DateColWithTz']) if not hasattr(df, 'DateColWithTz'): pytest.skip("no column with datetime with time zone") + col = df.DateColWithTz + assert is_datetime64tz_dtype(col.dtype) + assert str(col.dt.tz) == 'UTC' check(df.DateColWithTz) df = pd.concat(list(pd.read_sql_query("select * from types_test_data", @@ -1314,9 +1312,8 @@ def check(col): assert is_datetime64tz_dtype(col.dtype) assert str(col.dt.tz) == 'UTC' expected = sql.read_sql_table("types_test_data", self.conn) - tm.assert_series_equal(df.DateColWithTz, - expected.DateColWithTz - .astype('datetime64[ns, UTC]')) + # Removed ".astype('datetime64[ns, UTC]')"after GH 6415 was fixed + tm.assert_series_equal(df.DateColWithTz, expected.DateColWithTz) # xref #7139 # this might or might not be converted depending on the postgres driver From d466fd20e3f5468edd9739c3307ef3c9b1eeaf56 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Mon, 28 Aug 2017 20:33:31 -0700 Subject: [PATCH 14/18] Address whatsnew and test comments --- doc/source/whatsnew/v0.21.0.txt | 2 +- pandas/tests/indexes/datetimes/test_tools.py | 18 +++++++++++------- pandas/tests/io/test_sql.py | 2 ++ 3 files changed, 14 insertions(+), 8 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 4063a2a95bbd2..a1ef9cfd17426 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -326,7 +326,7 @@ Previously, :func:`to_datetime` did not localize datetime ``Series`` data when ` pd.to_datetime(s, utc=True) -Additionally, DataFrames with datetime columns returned by :func:`read_sql_table` and :func:`read_sql_query` with the `parse_dates` argument specified will also be localized to UTC only if the original SQL columns were timezone aware datetime columns. +Additionally, DataFrames with datetime columns that were parsed by :func:`read_sql_table` and :func:`read_sql_query` will also be localized to UTC only if the original SQL columns were timezone aware datetime columns. .. _whatsnew_0210.api: diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index 486ddf1dc1ac1..32f5a7215fb91 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -265,10 +265,10 @@ def test_to_datetime_tz_pytz(self): (list, DatetimeIndex, tm.assert_index_equal), (np.array, DatetimeIndex, tm.assert_index_equal), (Series, Series, tm.assert_series_equal)]) - def test_to_datetime_utc_true_with_constructors(self, - init_constructor, - end_constructor, - test_method): + def test_to_datetime_utc_true(self, + init_constructor, + end_constructor, + test_method): # See gh-11934 & gh-6415 data = ['20100102 121314', '20100102 121315'] expected_data = [pd.Timestamp('2010-01-02 12:13:14', tz='utc'), @@ -292,9 +292,13 @@ def test_to_datetime_utc_true_with_series_single_value(self): expected = pd.Series([pd.Timestamp(ts, tz='utc')]) tm.assert_series_equal(result, expected) - @pytest.mark.parametrize("dtype", [None, 'datetime64[ns]']) - def test_to_datetime_utc_true_with_naive_series(self, dtype): - test_dates = ['2013-01-01 00:00:00-01:00'] * 10 + @pytest.mark.parametrize("data, dtype", + [('2013-01-01 00:00:00-01:00', None), + ('2013-01-01 00:00:00-01:00','datetime64[ns]'), + ('2013-01-01 01:00:00', None), + ('2013-01-01 01:00:00', 'datetime64[ns]')]) + def test_to_datetime_utc_true_with_naive_dtype_series(self, data, dtype): + test_dates = [data] * 10 ser = pd.Series(test_dates, dtype=dtype) result = pd.to_datetime(ser, utc=True) expected_data = [pd.Timestamp('20130101 01:00:00', tz='utc')] * 10 diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 78fb31d819c2d..d7a09cc16f5b0 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -1312,6 +1312,8 @@ def check(col): assert is_datetime64tz_dtype(col.dtype) assert str(col.dt.tz) == 'UTC' expected = sql.read_sql_table("types_test_data", self.conn) + col = expected.DateColWithTz + assert is_datetime64tz_dtype(col.dtype) # Removed ".astype('datetime64[ns, UTC]')"after GH 6415 was fixed tm.assert_series_equal(df.DateColWithTz, expected.DateColWithTz) From 177b3fe6c34033f4ce50a8022bce7d863d09d9f6 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Mon, 28 Aug 2017 20:37:30 -0700 Subject: [PATCH 15/18] Fix linting errors --- pandas/tests/indexes/datetimes/test_tools.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index 32f5a7215fb91..0b4506b654fbb 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -293,8 +293,8 @@ def test_to_datetime_utc_true_with_series_single_value(self): tm.assert_series_equal(result, expected) @pytest.mark.parametrize("data, dtype", - [('2013-01-01 00:00:00-01:00', None), - ('2013-01-01 00:00:00-01:00','datetime64[ns]'), + [('2013-01-01 00:00:00-01:00', None), + ('2013-01-01 00:00:00-01:00', 'datetime64[ns]'), ('2013-01-01 01:00:00', None), ('2013-01-01 01:00:00', 'datetime64[ns]')]) def test_to_datetime_utc_true_with_naive_dtype_series(self, data, dtype): From 3407830af0826fb175384b0b82ec6e15ae04e483 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Thu, 31 Aug 2017 23:46:07 -0700 Subject: [PATCH 16/18] Fix tests and whatnew notes --- doc/source/whatsnew/v0.21.0.txt | 2 +- pandas/io/sql.py | 10 +++----- pandas/tests/indexes/datetimes/test_tools.py | 25 +++++++++++--------- pandas/tests/io/test_sql.py | 1 - pandas/tests/test_multilevel.py | 1 - 5 files changed, 18 insertions(+), 21 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index a1ef9cfd17426..d39eee97d4726 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -303,7 +303,7 @@ length 2+ levels, so a :class:`MultiIndex` is always returned from all of the UTC Localization with Series ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Previously, :func:`to_datetime` did not localize datetime ``Series`` data when ``utc=True`` was passed. Now, :func:`to_datetime` will correctly localize `Series` with a `datetime64[ns, UTC]` data type to be consistent with how list-like and Index data are handled. (:issue:`6415`). +Previously, :func:`to_datetime` did not localize datetime ``Series`` data when ``utc=True`` was passed. Now, :func:`to_datetime` will correctly localize ``Series`` with a ``datetime64[ns, UTC]`` dtype to be consistent with how list-like and `Index` data are handled. (:issue:`6415`). Previous Behavior diff --git a/pandas/io/sql.py b/pandas/io/sql.py index de3929bac6cfb..9c6d01d236c57 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -821,13 +821,9 @@ def _harmonize_columns(self, parse_dates=None): if (col_type is datetime or col_type is date or col_type is DatetimeTZDtype): - if col_type is DatetimeTZDtype: - # Convert tz-aware Datetime SQL columns to UTC - self.frame[col_name] = _handle_date_column(df_col, - utc=True) - else: - self.frame[col_name] = _handle_date_column(df_col) - + # Convert tz-aware Datetime SQL columns to UTC + utc = col_type is DatetimeTZDtype + self.frame[col_name] = _handle_date_column(df_col, utc=utc) elif col_type is float: # floats support NA, can always convert! self.frame[col_name] = df_col.astype(col_type, copy=False) diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index 0b4506b654fbb..089d74a1d69b8 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -292,17 +292,20 @@ def test_to_datetime_utc_true_with_series_single_value(self): expected = pd.Series([pd.Timestamp(ts, tz='utc')]) tm.assert_series_equal(result, expected) - @pytest.mark.parametrize("data, dtype", - [('2013-01-01 00:00:00-01:00', None), - ('2013-01-01 00:00:00-01:00', 'datetime64[ns]'), - ('2013-01-01 01:00:00', None), - ('2013-01-01 01:00:00', 'datetime64[ns]')]) - def test_to_datetime_utc_true_with_naive_dtype_series(self, data, dtype): - test_dates = [data] * 10 - ser = pd.Series(test_dates, dtype=dtype) - result = pd.to_datetime(ser, utc=True) - expected_data = [pd.Timestamp('20130101 01:00:00', tz='utc')] * 10 - expected = pd.Series(expected_data) + def test_to_datetime_utc_true_with_series_tzaware_string(self): + ts = '2013-01-01 00:00:00-01:00' + expected_ts = '2013-01-01 01:00:00' + data = pd.Series([ts] * 3) + result = pd.to_datetime(data, utc=True) + expected = pd.Series([pd.Timestamp(expected_ts, tz='utc')] * 3) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize('date, dtype', + [('2013-01-01 01:00:00', 'datetime64[ns]'), + ('2013-01-01 01:00:00', 'datetime64[ns, UTC]')]) + def test_to_datetime_utc_true_with_series_datetime_ns(self, date, dtype): + expected = pd.Series([pd.Timestamp('2013-01-01 01:00:00', tz='UTC')]) + result = pd.to_datetime(pd.Series([date], dtype=dtype), utc=True) tm.assert_series_equal(result, expected) def test_to_datetime_tz_psycopg2(self): diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index d7a09cc16f5b0..93eb0ff0ac1f2 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -1314,7 +1314,6 @@ def check(col): expected = sql.read_sql_table("types_test_data", self.conn) col = expected.DateColWithTz assert is_datetime64tz_dtype(col.dtype) - # Removed ".astype('datetime64[ns, UTC]')"after GH 6415 was fixed tm.assert_series_equal(df.DateColWithTz, expected.DateColWithTz) # xref #7139 diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index db486d2172f8a..6976fe162c5d5 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -2137,7 +2137,6 @@ def test_set_index_datetime(self): '2011-07-19 08:00:00', '2011-07-19 09:00:00'], 'value': range(6)}) df.index = pd.to_datetime(df.pop('datetime'), utc=True) - # Removed 'tz_localize('utc') below after GH 6415 was fixed df.index = df.index.tz_convert('US/Pacific') expected = pd.DatetimeIndex(['2011-07-19 07:00:00', From f9248adb0a02f7b7a103de2fbde792fd1efae7ae Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Thu, 31 Aug 2017 23:57:50 -0700 Subject: [PATCH 17/18] Remove extra linebreak --- doc/source/whatsnew/v0.21.0.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index d39eee97d4726..d27f40bf60278 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -398,7 +398,6 @@ Conversion - Bug in ``IntervalIndex.is_non_overlapping_monotonic`` when intervals are closed on both sides and overlap at a point (:issue:`16560`) - Bug in :func:`Series.fillna` returns frame when ``inplace=True`` and ``value`` is dict (:issue:`16156`) - Indexing ^^^^^^^^ From 5eb7d9cf1349a1df786732fda04aa75f76b2f46c Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Thu, 31 Aug 2017 23:59:56 -0700 Subject: [PATCH 18/18] Double tick Index --- doc/source/whatsnew/v0.21.0.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index d27f40bf60278..e0963a1908bbc 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -303,7 +303,7 @@ length 2+ levels, so a :class:`MultiIndex` is always returned from all of the UTC Localization with Series ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Previously, :func:`to_datetime` did not localize datetime ``Series`` data when ``utc=True`` was passed. Now, :func:`to_datetime` will correctly localize ``Series`` with a ``datetime64[ns, UTC]`` dtype to be consistent with how list-like and `Index` data are handled. (:issue:`6415`). +Previously, :func:`to_datetime` did not localize datetime ``Series`` data when ``utc=True`` was passed. Now, :func:`to_datetime` will correctly localize ``Series`` with a ``datetime64[ns, UTC]`` dtype to be consistent with how list-like and ``Index`` data are handled. (:issue:`6415`). Previous Behavior