From ac5a3d166b1021db9aaed9828a8e0a9ff98ffdf0 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sat, 7 Jul 2018 18:07:46 -0500 Subject: [PATCH 01/29] BUG: to_datetime no longer converts offsets to UTC --- pandas/_libs/tslib.pyx | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index c1a0e58a4fb1a..86ed403eeb3a5 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -465,12 +465,16 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', bint is_ignore = errors=='ignore' bint is_coerce = errors=='coerce' _TSObject _ts - int out_local=0, out_tzoffset=0 + #int out_local=0, out_tzoffset=0 + ndarray[int] out_local + ndarray[int] out_tzoffset # specify error conditions assert is_raise or is_ignore or is_coerce try: + out_local = np.zeros(n, dtype=np.int64) + out_tzoffset = np.empty(n, dtype=int) result = np.empty(n, dtype='M8[ns]') iresult = result.view('i8') for i in range(n): @@ -562,7 +566,7 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', val = val.encode('utf-8') try: - _string_to_dts(val, &dts, &out_local, &out_tzoffset) + _string_to_dts(val, &dts, &out_local[i], &out_tzoffset[i]) except ValueError: # A ValueError at this point is a _parsing_ error # specifically _not_ OutOfBoundsDatetime @@ -607,8 +611,8 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', # No error raised by string_to_dts, pick back up # where we left off value = dtstruct_to_dt64(&dts) - if out_local == 1: - tz = pytz.FixedOffset(out_tzoffset) + if out_local[i] == 1: + tz = pytz.FixedOffset(out_tzoffset[i]) value = tz_convert_single(value, tz, 'UTC') iresult[i] = value try: @@ -651,6 +655,11 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', else: raise TypeError + if not (out_tzoffset[0] == out_tzoffset).all(): + # GH 17697 + # If the user passed datetime strings with different UTC offsets, then force down + # the path where we return an array of objects + raise ValueError return result except OutOfBoundsDatetime: if is_raise: From 6bf46a866c2169951d3d30c027fcf2a4d864d6f7 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 8 Jul 2018 11:38:25 -0500 Subject: [PATCH 02/29] Document and now return offset --- pandas/_libs/tslib.pyx | 69 +++++++++++++++++++++++++++++------------- 1 file changed, 48 insertions(+), 21 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 86ed403eeb3a5..ccd34cc3b50f4 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -451,9 +451,22 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', dayfirst=False, yearfirst=False, format=None, utc=None, require_iso8601=False): + """ + Converts a 1D array of date-like values to a numpy array of either: + 1) datetime64[ns] data + 2) datetime.datetime objects, if OutOfBoundsDatetime or TypeError is encountered + + Also returns a pytz.FixedOffset if an array of strings with the same timezone offset if passed + + Handles datetime.date, datetime.datetime, np.datetime64 objects, numeric, strings + + Returns + ------- + (ndarray, timezone offset) + """ cdef: Py_ssize_t i, n = len(values) - object val, py_dt + object val, py_dt, tz, tz_out = None ndarray[int64_t] iresult ndarray[object] oresult pandas_datetimestruct dts @@ -461,20 +474,23 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', bint seen_integer = 0 bint seen_string = 0 bint seen_datetime = 0 + bint seen_datetime_offset = 0 bint is_raise = errors=='raise' bint is_ignore = errors=='ignore' bint is_coerce = errors=='coerce' _TSObject _ts - #int out_local=0, out_tzoffset=0 - ndarray[int] out_local - ndarray[int] out_tzoffset + int out_local=0, out_tzoffset=0 + # Can't directly create a ndarray[int] out_local, since most np.array constructors expect + # a long dtype, while _string_to_dts expectes purely int, maybe something I am missing? + ndarray[int64_t] out_local_values + ndarray[int64_t] out_tzoffset_values # specify error conditions assert is_raise or is_ignore or is_coerce try: - out_local = np.zeros(n, dtype=np.int64) - out_tzoffset = np.empty(n, dtype=int) + out_local_values = np.empty(n, dtype=np.int64) + out_tzoffset_values = np.empty(n, dtype=np.int64) result = np.empty(n, dtype='M8[ns]') iresult = result.view('i8') for i in range(n): @@ -566,7 +582,7 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', val = val.encode('utf-8') try: - _string_to_dts(val, &dts, &out_local[i], &out_tzoffset[i]) + _string_to_dts(val, &dts, &out_local, &out_tzoffset) except ValueError: # A ValueError at this point is a _parsing_ error # specifically _not_ OutOfBoundsDatetime @@ -582,7 +598,7 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', raise ValueError("time data {val} doesn't match " "format specified" .format(val=val)) - return values + return values, tz_out try: py_dt = parse_datetime_string(val, dayfirst=dayfirst, @@ -610,9 +626,12 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', else: # No error raised by string_to_dts, pick back up # where we left off + out_tzoffset_values[i] = out_tzoffset + out_local_values[i] = out_local value = dtstruct_to_dt64(&dts) - if out_local[i] == 1: - tz = pytz.FixedOffset(out_tzoffset[i]) + if out_local == 1: + seen_datetime_offset = 1 + tz = pytz.FixedOffset(out_tzoffset) value = tz_convert_single(value, tz, 'UTC') iresult[i] = value try: @@ -629,7 +648,7 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', raise ValueError("time data {val} doesn't " "match format specified" .format(val=val)) - return values + return values, tz_out raise else: @@ -655,12 +674,21 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', else: raise TypeError - if not (out_tzoffset[0] == out_tzoffset).all(): + if seen_datetime_offset: # GH 17697 - # If the user passed datetime strings with different UTC offsets, then force down - # the path where we return an array of objects - raise ValueError - return result + # 1) If all the offsets are equal, then return 1, pytz.FixedOffset for the + # parsed dates so it can behave nicely with DatetimeIndex + # 2) If the offsets are different, then force the parsing down the object path + # where an array of datetimes (with individual datutil.tzoffsets) are returned + + # Faster to compare integers than to compare objects + is_same_offsets = (out_tzoffset_values[0] == out_tzoffset_values).all() + if not is_same_offsets: + raise TypeError + else: + tz_out = pytz.FixedOffset(out_tzoffset_values[0]) + + return result, tz_out except OutOfBoundsDatetime: if is_raise: raise @@ -682,7 +710,7 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', oresult[i] = val.item() else: oresult[i] = val - return oresult + return oresult, tz_out except TypeError: oresult = np.empty(n, dtype=object) @@ -704,14 +732,13 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', except Exception: if is_raise: raise - return values - # oresult[i] = val + return values, tz_out else: if is_raise: raise - return values + return values, tz_out - return oresult + return oresult, tz_out cdef inline bint _parse_today_now(str val, int64_t* iresult): From 678b337b80b2f9d8f4b2662376d0f6f9b846274e Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 8 Jul 2018 13:55:03 -0500 Subject: [PATCH 03/29] Add some tests, start converting some existing uses of array_to_datetime --- pandas/_libs/tslib.pyx | 2 +- pandas/core/dtypes/cast.py | 2 +- pandas/core/tools/datetimes.py | 4 +- pandas/tests/tslibs/test_array_to_datetime.py | 37 +++++++++++-------- 4 files changed, 25 insertions(+), 20 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index ccd34cc3b50f4..ca40315b3df9b 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -676,7 +676,7 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', if seen_datetime_offset: # GH 17697 - # 1) If all the offsets are equal, then return 1, pytz.FixedOffset for the + # 1) If all the offsets are equal, then return one pytz.FixedOffset for the # parsed dates so it can behave nicely with DatetimeIndex # 2) If the offsets are different, then force the parsing down the object path # where an array of datetimes (with individual datutil.tzoffsets) are returned diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 2cd8144e43cea..c056fe8dd82bf 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -917,7 +917,7 @@ def try_datetime(v): # GH19671 v = tslib.array_to_datetime(v, require_iso8601=True, - errors='raise') + errors='raise')[0] except ValueError: # we might have a sequence of the same-datetimes with tz's diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index a99c913f95e82..5ad66907ec973 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -266,7 +266,7 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None, result = arg if result is None and (format is None or infer_datetime_format): - result = tslib.array_to_datetime( + result, tz_parsed = tslib.array_to_datetime( arg, errors=errors, utc=tz == 'utc', @@ -696,7 +696,7 @@ def calc(carg): parsed = parsing.try_parse_year_month_day(carg / 10000, carg / 100 % 100, carg % 100) - return tslib.array_to_datetime(parsed, errors=errors) + return tslib.array_to_datetime(parsed, errors=errors)[0] def calc_with_mask(carg, mask): result = np.empty(carg.shape, dtype='M8[ns]') diff --git a/pandas/tests/tslibs/test_array_to_datetime.py b/pandas/tests/tslibs/test_array_to_datetime.py index eb77e52e7c91d..c277d8652b40e 100644 --- a/pandas/tests/tslibs/test_array_to_datetime.py +++ b/pandas/tests/tslibs/test_array_to_datetime.py @@ -3,6 +3,7 @@ import numpy as np import pytest +import pytz from pandas._libs import tslib from pandas.compat.numpy import np_array_datetime64_compat @@ -52,7 +53,7 @@ def test_parsers_iso8601_invalid(self, date_str): class TestArrayToDatetime(object): def test_parsing_valid_dates(self): arr = np.array(['01-01-2013', '01-02-2013'], dtype=object) - result = tslib.array_to_datetime(arr) + result = tslib.array_to_datetime(arr)[0] expected = ['2013-01-01T00:00:00.000000000-0000', '2013-01-02T00:00:00.000000000-0000'] tm.assert_numpy_array_equal( @@ -60,38 +61,42 @@ def test_parsing_valid_dates(self): np_array_datetime64_compat(expected, dtype='M8[ns]')) arr = np.array(['Mon Sep 16 2013', 'Tue Sep 17 2013'], dtype=object) - result = tslib.array_to_datetime(arr) + result = tslib.array_to_datetime(arr)[0] expected = ['2013-09-16T00:00:00.000000000-0000', '2013-09-17T00:00:00.000000000-0000'] tm.assert_numpy_array_equal( result, np_array_datetime64_compat(expected, dtype='M8[ns]')) - @pytest.mark.parametrize('dt_string', [ - '01-01-2013 08:00:00+08:00', - '2013-01-01T08:00:00.000000000+0800', - '2012-12-31T16:00:00.000000000-0800', - '12-31-2012 23:00:00-01:00']) - def test_parsing_timezone_offsets(self, dt_string): + @pytest.mark.parametrize('dt_string, expected_tz', [ + ['01-01-2013 08:00:00+08:00', None], + ['2013-01-01T08:00:00.000000000+0800', pytz.FixedOffset(480)], + ['2012-12-31T16:00:00.000000000-0800', pytz.FixedOffset(-480)], + ['12-31-2012 23:00:00-01:00', None]]) + def test_parsing_timezone_offsets(self, dt_string, expected_tz): # All of these datetime strings with offsets are equivalent # to the same datetime after the timezone offset is added + + # TODO: Appears that the dateparser doesnt return offset info if string is non-ISO + # maybe something in the np_datetime_strings parser is not catching this? arr = np.array(['01-01-2013 00:00:00'], dtype=object) - expected = tslib.array_to_datetime(arr) + expected = tslib.array_to_datetime(arr)[0] arr = np.array([dt_string], dtype=object) - result = tslib.array_to_datetime(arr) + result, result_tz = tslib.array_to_datetime(arr) tm.assert_numpy_array_equal(result, expected) + assert result_tz is expected_tz def test_number_looking_strings_not_into_datetime(self): # GH#4601 # These strings don't look like datetimes so they shouldn't be # attempted to be converted arr = np.array(['-352.737091', '183.575577'], dtype=object) - result = tslib.array_to_datetime(arr, errors='ignore') + result = tslib.array_to_datetime(arr, errors='ignore')[0] tm.assert_numpy_array_equal(result, arr) arr = np.array(['1', '2', '3', '4', '5'], dtype=object) - result = tslib.array_to_datetime(arr, errors='ignore') + result = tslib.array_to_datetime(arr, errors='ignore')[0] tm.assert_numpy_array_equal(result, arr) @pytest.mark.parametrize('invalid_date', [ @@ -105,13 +110,13 @@ def test_coerce_outside_ns_bounds(self, invalid_date): with pytest.raises(ValueError): tslib.array_to_datetime(arr, errors='raise') - result = tslib.array_to_datetime(arr, errors='coerce') + result = tslib.array_to_datetime(arr, errors='coerce')[0] expected = np.array([tslib.iNaT], dtype='M8[ns]') tm.assert_numpy_array_equal(result, expected) def test_coerce_outside_ns_bounds_one_valid(self): arr = np.array(['1/1/1000', '1/1/2000'], dtype=object) - result = tslib.array_to_datetime(arr, errors='coerce') + result = tslib.array_to_datetime(arr, errors='coerce')[0] expected = [tslib.iNaT, '2000-01-01T00:00:00.000000000-0000'] tm.assert_numpy_array_equal( @@ -123,11 +128,11 @@ def test_coerce_of_invalid_datetimes(self): # Without coercing, the presence of any invalid dates prevents # any values from being converted - result = tslib.array_to_datetime(arr, errors='ignore') + result = tslib.array_to_datetime(arr, errors='ignore')[0] tm.assert_numpy_array_equal(result, arr) # With coercing, the invalid dates becomes iNaT - result = tslib.array_to_datetime(arr, errors='coerce') + result = tslib.array_to_datetime(arr, errors='coerce')[0] expected = ['2013-01-01T00:00:00.000000000-0000', tslib.iNaT, tslib.iNaT] From 19171488519d149198ff266104e304cfe3d5a549 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 8 Jul 2018 16:16:13 -0500 Subject: [PATCH 04/29] Add more tests --- pandas/_libs/tslib.pyx | 3 ++- pandas/core/tools/datetimes.py | 2 ++ pandas/tests/indexes/datetimes/test_tools.py | 2 +- pandas/tests/tslibs/test_array_to_datetime.py | 11 +++++++++++ 4 files changed, 16 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index ca40315b3df9b..7694fe3611091 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -322,7 +322,7 @@ cpdef array_with_unit_to_datetime(ndarray values, unit, errors='coerce'): if unit == 'ns': if issubclass(values.dtype.type, np.integer): return values.astype('M8[ns]') - return array_to_datetime(values.astype(object), errors=errors) + return array_to_datetime(values.astype(object), errors=errors)[0] m = cast_from_unit(None, unit) @@ -686,6 +686,7 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', if not is_same_offsets: raise TypeError else: + # Open question: should this return dateutil offset or pytz offset? tz_out = pytz.FixedOffset(out_tzoffset_values[0]) return result, tz_out diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 5ad66907ec973..33057a21ac7ef 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -274,6 +274,8 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None, yearfirst=yearfirst, require_iso8601=require_iso8601 ) + if tz_parsed is not None: + return DatetimeIndex._simple_new(result, name=name, tz=tz_parsed) if is_datetime64_dtype(result) and box: result = DatetimeIndex(result, tz=tz, name=name) diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index fa9f9fc90387a..bc5541e3252ea 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -1031,7 +1031,7 @@ def test_string_na_nat_conversion(self, cache): else: expected[i] = parse_date(val) - result = tslib.array_to_datetime(strings) + result = tslib.array_to_datetime(strings)[0] tm.assert_almost_equal(result, expected) result2 = to_datetime(strings, cache=cache) diff --git a/pandas/tests/tslibs/test_array_to_datetime.py b/pandas/tests/tslibs/test_array_to_datetime.py index c277d8652b40e..88efa82a5609c 100644 --- a/pandas/tests/tslibs/test_array_to_datetime.py +++ b/pandas/tests/tslibs/test_array_to_datetime.py @@ -4,6 +4,7 @@ import numpy as np import pytest import pytz +from dateutil.tz.tz import tzoffset from pandas._libs import tslib from pandas.compat.numpy import np_array_datetime64_compat @@ -87,6 +88,16 @@ def test_parsing_timezone_offsets(self, dt_string, expected_tz): tm.assert_numpy_array_equal(result, expected) assert result_tz is expected_tz + def test_parsing_different_timezone_offsets(self): + #GH 17697 + data = ["2015-11-18 15:30:00+05:30", "2015-11-18 15:30:00+06:30"] + result, result_tz = tslib.array_to_datetime(np.array(data, dtype=object)) + expected = np.array([datetime(2015, 11, 18, 15, 30, tzinfo=tzoffset(None, 19800)), + datetime(2015, 11, 18, 15, 30, tzinfo=tzoffset(None, 23400))], + dtype=object) + tm.assert_numpy_array_equal(result, expected) + assert result_tz is None + def test_number_looking_strings_not_into_datetime(self): # GH#4601 # These strings don't look like datetimes so they shouldn't be From 581a33eafcef551c9ec8aa25096f65132f46723f Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 8 Jul 2018 16:38:46 -0500 Subject: [PATCH 05/29] Adjust test --- pandas/_libs/tslib.pyx | 3 ++- pandas/tests/indexes/datetimes/test_tools.py | 25 ++++++++------------ 2 files changed, 12 insertions(+), 16 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 7694fe3611091..7f14975a8618c 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -457,6 +457,7 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', 2) datetime.datetime objects, if OutOfBoundsDatetime or TypeError is encountered Also returns a pytz.FixedOffset if an array of strings with the same timezone offset if passed + and utc=True is not passed Handles datetime.date, datetime.datetime, np.datetime64 objects, numeric, strings @@ -674,7 +675,7 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', else: raise TypeError - if seen_datetime_offset: + if seen_datetime_offset and not utc_convert: # GH 17697 # 1) If all the offsets are equal, then return one pytz.FixedOffset for the # parsed dates so it can behave nicely with DatetimeIndex diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index bc5541e3252ea..e1ea0f653b923 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -1495,23 +1495,18 @@ def test_parsers_time(self): assert res == expected_arr @pytest.mark.parametrize('cache', [True, False]) - def test_parsers_timezone_minute_offsets_roundtrip(self, cache): + @pytest.mark.parametrize('dt_string, tz, dt_string_repr', [ + ('2013-01-01 05:45+0545', pytz.FixedOffset(345), + "Timestamp('2013-01-01 05:45:00+0545', tz='pytz.FixedOffset(345)')"), + ('2013-01-01 05:30+0530', pytz.FixedOffset(330), + "Timestamp('2013-01-01 05:30:00+0530', tz='pytz.FixedOffset(330)')")]) + def test_parsers_timezone_minute_offsets_roundtrip(self, cache, dt_string, tz, dt_string_repr): # GH11708 base = to_datetime("2013-01-01 00:00:00", cache=cache) - dt_strings = [ - ('2013-01-01 05:45+0545', - "Asia/Katmandu", - "Timestamp('2013-01-01 05:45:00+0545', tz='Asia/Katmandu')"), - ('2013-01-01 05:30+0530', - "Asia/Kolkata", - "Timestamp('2013-01-01 05:30:00+0530', tz='Asia/Kolkata')") - ] - - for dt_string, tz, dt_string_repr in dt_strings: - dt_time = to_datetime(dt_string, cache=cache) - assert base == dt_time - converted_time = dt_time.tz_localize('UTC').tz_convert(tz) - assert dt_string_repr == repr(converted_time) + base = base.tz_localize('UTC').tz_convert(tz) + dt_time = to_datetime(dt_string, cache=cache) + assert base == dt_time + assert dt_string_repr == repr(dt_time) @pytest.fixture(params=['D', 's', 'ms', 'us', 'ns']) From a1bc8f916947faca3d5b08d1d2cd0b2aaae04f57 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 8 Jul 2018 16:50:24 -0500 Subject: [PATCH 06/29] Flake8 --- pandas/core/tools/datetimes.py | 3 ++- pandas/tests/indexes/datetimes/test_tools.py | 3 ++- pandas/tests/tslibs/test_array_to_datetime.py | 16 ++++++++++------ 3 files changed, 14 insertions(+), 8 deletions(-) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 33057a21ac7ef..adfdbb7611105 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -275,7 +275,8 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None, require_iso8601=require_iso8601 ) if tz_parsed is not None: - return DatetimeIndex._simple_new(result, name=name, tz=tz_parsed) + return DatetimeIndex._simple_new(result, name=name, + tz=tz_parsed) if is_datetime64_dtype(result) and box: result = DatetimeIndex(result, tz=tz, name=name) diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index e1ea0f653b923..9505818977bad 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -1500,7 +1500,8 @@ def test_parsers_time(self): "Timestamp('2013-01-01 05:45:00+0545', tz='pytz.FixedOffset(345)')"), ('2013-01-01 05:30+0530', pytz.FixedOffset(330), "Timestamp('2013-01-01 05:30:00+0530', tz='pytz.FixedOffset(330)')")]) - def test_parsers_timezone_minute_offsets_roundtrip(self, cache, dt_string, tz, dt_string_repr): + def test_parsers_timezone_minute_offsets_roundtrip(self, cache, dt_string, + tz, dt_string_repr): # GH11708 base = to_datetime("2013-01-01 00:00:00", cache=cache) base = base.tz_localize('UTC').tz_convert(tz) diff --git a/pandas/tests/tslibs/test_array_to_datetime.py b/pandas/tests/tslibs/test_array_to_datetime.py index 88efa82a5609c..a605eea6abeb5 100644 --- a/pandas/tests/tslibs/test_array_to_datetime.py +++ b/pandas/tests/tslibs/test_array_to_datetime.py @@ -78,8 +78,9 @@ def test_parsing_timezone_offsets(self, dt_string, expected_tz): # All of these datetime strings with offsets are equivalent # to the same datetime after the timezone offset is added - # TODO: Appears that the dateparser doesnt return offset info if string is non-ISO - # maybe something in the np_datetime_strings parser is not catching this? + # TODO: Appears that parsing non-ISO strings adjust the date to UTC + # but don't return the offset. Not sure if this is the intended + # behavior of non-iso strings in np_datetime_strings arr = np.array(['01-01-2013 00:00:00'], dtype=object) expected = tslib.array_to_datetime(arr)[0] @@ -89,11 +90,14 @@ def test_parsing_timezone_offsets(self, dt_string, expected_tz): assert result_tz is expected_tz def test_parsing_different_timezone_offsets(self): - #GH 17697 + # GH 17697 data = ["2015-11-18 15:30:00+05:30", "2015-11-18 15:30:00+06:30"] - result, result_tz = tslib.array_to_datetime(np.array(data, dtype=object)) - expected = np.array([datetime(2015, 11, 18, 15, 30, tzinfo=tzoffset(None, 19800)), - datetime(2015, 11, 18, 15, 30, tzinfo=tzoffset(None, 23400))], + data = np.array(data, dtype=object) + result, result_tz = tslib.array_to_datetime(data) + expected = np.array([datetime(2015, 11, 18, 15, 30, + tzinfo=tzoffset(None, 19800)), + datetime(2015, 11, 18, 15, 30, + tzinfo=tzoffset(None, 23400))], dtype=object) tm.assert_numpy_array_equal(result, expected) assert result_tz is None From 80042e61d6c80260caa430c6c5fc00994a1ec121 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 8 Jul 2018 17:18:01 -0500 Subject: [PATCH 07/29] Add tests confirming new behavior --- pandas/tests/indexes/datetimes/test_tools.py | 31 ++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index 9505818977bad..9f23551e43b7f 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -7,6 +7,7 @@ import dateutil import numpy as np from dateutil.parser import parse +from dateutil.tz.tz import tzoffset from datetime import datetime, time from distutils.version import LooseVersion @@ -577,6 +578,36 @@ def test_week_without_day_and_calendar_year(self, date, format): with tm.assert_raises_regex(ValueError, msg): pd.to_datetime(date, format=format) + def test_ts_strings_with_same_offset(self): + # GH 17697, 11736 + ts_str = "2015-11-18 15:30:00+05:30" + result = to_datetime(ts_str) + expected = Timestamp(ts_str) + assert result == expected + + expected = DatetimeIndex([Timestamp(ts_str)] * 2) + result = to_datetime([ts_str] * 2) + tm.assert_index_equal(result, expected) + + result = DatetimeIndex([ts_str] * 2) + tm.assert_index_equal(result, expected) + + def test_ts_strings_with_different_offsets(self): + # GH 17697, 11736 + ts_strings = ["2015-11-18 15:30:00+05:30", + "2015-11-18 16:30:00+06:30"] + result = to_datetime(ts_strings) + expected = np.array([datetime(2015, 11, 18, 15, 30, + tzinfo=tzoffset(None, 19800)), + datetime(2015, 11, 18, 16, 30, + tzinfo=tzoffset(None, 23400))], + dtype=object) + tm.assert_numpy_array_equal(result, expected) + + result = to_datetime(ts_strings, utc=True) + expected = DatetimeIndex([Timestamp(2015, 11, 18, 10)] * 2, tz='UTC') + tm.assert_index_equal(result, expected) + class TestToDatetimeUnit(object): @pytest.mark.parametrize('cache', [True, False]) From bacb6e3ff65c9060fb4eebec89d3d38dfa8c46cb Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Tue, 10 Jul 2018 21:32:23 -0700 Subject: [PATCH 08/29] Lint --- pandas/_libs/tslib.pyx | 38 +++++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 9409fa2eedf9f..b00d58d99c64d 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -452,16 +452,18 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', """ Converts a 1D array of date-like values to a numpy array of either: 1) datetime64[ns] data - 2) datetime.datetime objects, if OutOfBoundsDatetime or TypeError is encountered + 2) datetime.datetime objects, if OutOfBoundsDatetime or TypeError + is encountered - Also returns a pytz.FixedOffset if an array of strings with the same timezone offset if passed - and utc=True is not passed + Also returns a pytz.FixedOffset if an array of strings with the same + timezone offset if passed and utc=True is not passed - Handles datetime.date, datetime.datetime, np.datetime64 objects, numeric, strings + Handles datetime.date, datetime.datetime, np.datetime64 objects, numeric, + strings Returns ------- - (ndarray, timezone offset) + (ndarray, timezone offset) """ cdef: Py_ssize_t i, n = len(values) @@ -479,17 +481,19 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', bint is_coerce = errors=='coerce' _TSObject _ts int out_local=0, out_tzoffset=0 - # Can't directly create a ndarray[int] out_local, since most np.array constructors expect - # a long dtype, while _string_to_dts expectes purely int, maybe something I am missing? + # Can't directly create a ndarray[int] out_local, + # since most np.array constructors expect a long dtype + # while _string_to_dts expects purely int + # maybe something I am missing? ndarray[int64_t] out_local_values - ndarray[int64_t] out_tzoffset_values + ndarray[int64_t] out_tzoffset_vals # specify error conditions assert is_raise or is_ignore or is_coerce try: out_local_values = np.empty(n, dtype=np.int64) - out_tzoffset_values = np.empty(n, dtype=np.int64) + out_tzoffset_vals = np.empty(n, dtype=np.int64) result = np.empty(n, dtype='M8[ns]') iresult = result.view('i8') for i in range(n): @@ -625,7 +629,7 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', else: # No error raised by string_to_dts, pick back up # where we left off - out_tzoffset_values[i] = out_tzoffset + out_tzoffset_vals[i] = out_tzoffset out_local_values[i] = out_local value = dtstruct_to_dt64(&dts) if out_local == 1: @@ -675,18 +679,18 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', if seen_datetime_offset and not utc_convert: # GH 17697 - # 1) If all the offsets are equal, then return one pytz.FixedOffset for the - # parsed dates so it can behave nicely with DatetimeIndex - # 2) If the offsets are different, then force the parsing down the object path - # where an array of datetimes (with individual datutil.tzoffsets) are returned + # 1) If all the offsets are equal, return one pytz.FixedOffset for + # the parsed dates to (maybe) pass to DatetimeIndex + # 2) If the offsets are different, then force the parsing down the + # object path where an array of datetimes + # (with individual datutil.tzoffsets) are returned # Faster to compare integers than to compare objects - is_same_offsets = (out_tzoffset_values[0] == out_tzoffset_values).all() + is_same_offsets = (out_tzoffset_vals[0] == out_tzoffset_vals).all() if not is_same_offsets: raise TypeError else: - # Open question: should this return dateutil offset or pytz offset? - tz_out = pytz.FixedOffset(out_tzoffset_values[0]) + tz_out = pytz.FixedOffset(out_tzoffset_vals[0]) return result, tz_out except OutOfBoundsDatetime: From a2f4aad59dc663ddceba965a831538dbf6af0969 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Tue, 10 Jul 2018 22:04:31 -0700 Subject: [PATCH 09/29] adjust a test --- pandas/tests/frame/test_to_csv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py index 3ad25ae73109e..5705d5474a65d 100644 --- a/pandas/tests/frame/test_to_csv.py +++ b/pandas/tests/frame/test_to_csv.py @@ -154,7 +154,7 @@ def test_to_csv_from_csv5(self): self.tzframe.to_csv(path) result = pd.read_csv(path, index_col=0, parse_dates=['A']) - converter = lambda c: to_datetime(result[c]).dt.tz_localize( + converter = lambda c: to_datetime(result[c]).dt.tz_convert( 'UTC').dt.tz_convert(self.tzframe[c].dt.tz) result['B'] = converter('B') result['C'] = converter('C') From d48f34141300494c4ee24f9790ff22bcbc48e104 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Tue, 10 Jul 2018 23:04:06 -0700 Subject: [PATCH 10/29] Ensure box object index, pass tests --- pandas/core/tools/datetimes.py | 14 +++++++++----- pandas/tests/frame/test_to_csv.py | 11 +++++------ 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 42f8ad55b14b1..02622ed1ded9f 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -23,7 +23,8 @@ is_float, is_list_like, is_scalar, - is_numeric_dtype) + is_numeric_dtype, + is_object_dtype) from pandas.core.dtypes.generic import ( ABCIndexClass, ABCSeries, ABCDataFrame) @@ -178,7 +179,7 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None, - Index-like if box=True - ndarray of Timestamps if box=False """ - from pandas import DatetimeIndex + from pandas import Index, DatetimeIndex if isinstance(arg, (list, tuple)): arg = np.array(arg, dtype='O') @@ -278,8 +279,11 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None, return DatetimeIndex._simple_new(result, name=name, tz=tz_parsed) - if is_datetime64_dtype(result) and box: - result = DatetimeIndex(result, tz=tz, name=name) + if box: + if is_datetime64_dtype(result): + return DatetimeIndex(result, tz=tz, name=name) + elif is_object_dtype(result): + return Index(result, name=name) return result except ValueError as e: @@ -407,7 +411,7 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False, datetime.datetime objects as well). box : boolean, default True - - If True returns a DatetimeIndex + - If True returns a DatetimeIndex or Index - If False returns ndarray of values. format : string, default None strftime to parse time, eg "%d/%m/%Y", note that "%f" will parse diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py index 5705d5474a65d..9e3b606f31973 100644 --- a/pandas/tests/frame/test_to_csv.py +++ b/pandas/tests/frame/test_to_csv.py @@ -1027,12 +1027,11 @@ def test_to_csv_with_dst_transitions(self): time_range = np.array(range(len(i)), dtype='int64') df = DataFrame({'A': time_range}, index=i) df.to_csv(path, index=True) - # we have to reconvert the index as we # don't parse the tz's result = read_csv(path, index_col=0) - result.index = to_datetime(result.index).tz_localize( - 'UTC').tz_convert('Europe/London') + result.index = to_datetime(result.index, utc=True).tz_convert( + 'Europe/London') assert_frame_equal(result, df) # GH11619 @@ -1043,9 +1042,9 @@ def test_to_csv_with_dst_transitions(self): with ensure_clean('csv_date_format_with_dst') as path: df.to_csv(path, index=True) result = read_csv(path, index_col=0) - result.index = to_datetime(result.index).tz_localize( - 'UTC').tz_convert('Europe/Paris') - result['idx'] = to_datetime(result['idx']).astype( + result.index = to_datetime(result.index, utc=True).tz_convert( + 'Europe/Paris') + result['idx'] = to_datetime(result['idx'], utc=True).astype( 'datetime64[ns, Europe/Paris]') assert_frame_equal(result, df) From 7efb25c1d8da6089ba90c5a443c0fc76467a2eb0 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Wed, 11 Jul 2018 12:54:36 -0700 Subject: [PATCH 11/29] Adjust tests --- pandas/core/tools/datetimes.py | 5 ++-- .../tests/indexes/datetimes/test_timezones.py | 4 +-- pandas/tests/indexes/datetimes/test_tools.py | 29 +++++++++++++------ pandas/tests/reshape/test_concat.py | 4 +-- pandas/tests/test_algos.py | 3 +- pandas/tests/test_base.py | 13 ++++----- pandas/tests/test_resample.py | 4 +-- 7 files changed, 37 insertions(+), 25 deletions(-) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 02622ed1ded9f..248c7e1614af3 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -179,7 +179,7 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None, - Index-like if box=True - ndarray of Timestamps if box=False """ - from pandas import Index, DatetimeIndex + from pandas import DatetimeIndex if isinstance(arg, (list, tuple)): arg = np.array(arg, dtype='O') @@ -275,7 +275,7 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None, yearfirst=yearfirst, require_iso8601=require_iso8601 ) - if tz_parsed is not None: + if tz_parsed is not None and box: return DatetimeIndex._simple_new(result, name=name, tz=tz_parsed) @@ -283,6 +283,7 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None, if is_datetime64_dtype(result): return DatetimeIndex(result, tz=tz, name=name) elif is_object_dtype(result): + from pandas import Index return Index(result, name=name) return result diff --git a/pandas/tests/indexes/datetimes/test_timezones.py b/pandas/tests/indexes/datetimes/test_timezones.py index 3697d183d2fc6..67eb81336f648 100644 --- a/pandas/tests/indexes/datetimes/test_timezones.py +++ b/pandas/tests/indexes/datetimes/test_timezones.py @@ -317,8 +317,8 @@ def test_dti_tz_localize_nonexistent_raise_coerce(self): result = index.tz_localize(tz=tz, errors='coerce') test_times = ['2015-03-08 01:00-05:00', 'NaT', '2015-03-08 03:00-04:00'] - dti = DatetimeIndex(test_times) - expected = dti.tz_localize('UTC').tz_convert('US/Eastern') + dti = to_datetime(test_times, utc=True) + expected = dti.tz_convert('US/Eastern') tm.assert_index_equal(result, expected) @pytest.mark.parametrize('tz', [pytz.timezone('US/Eastern'), diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index 9f23551e43b7f..1156146ef328e 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -485,7 +485,9 @@ def test_to_datetime_tz_psycopg2(self, cache): # dtype coercion i = pd.DatetimeIndex([ '2000-01-01 08:00:00+00:00' - ], tz=psycopg2.tz.FixedOffsetTimezone(offset=-300, name=None)) + ]) + i = i.tz_convert(psycopg2.tz.FixedOffsetTimezone(offset=-300, + name=None)) assert is_datetime64_ns_dtype(i) # tz coerceion @@ -602,7 +604,9 @@ def test_ts_strings_with_different_offsets(self): datetime(2015, 11, 18, 16, 30, tzinfo=tzoffset(None, 23400))], dtype=object) - tm.assert_numpy_array_equal(result, expected) + # GH 21864 + expected = Index(expected) + tm.assert_index_equal(result, expected) result = to_datetime(ts_strings, utc=True) expected = DatetimeIndex([Timestamp(2015, 11, 18, 10)] * 2, tz='UTC') @@ -1009,14 +1013,19 @@ def test_to_datetime_types(self, cache): # assert result == expected @pytest.mark.parametrize('cache', [True, False]) - def test_to_datetime_unprocessable_input(self, cache): + @pytest.mark.parametrize('box, klass, assert_method', [ + [True, Index, 'assert_index_equal'], + [False, np.array, 'assert_numpy_array_equal'] + ]) + def test_to_datetime_unprocessable_input(self, cache, box, klass, + assert_method): # GH 4928 - tm.assert_numpy_array_equal( - to_datetime([1, '1'], errors='ignore', cache=cache), - np.array([1, '1'], dtype='O') - ) + # GH 21864 + result = to_datetime([1, '1'], errors='ignore', cache=cache, box=box) + expected = klass(np.array([1, '1'], dtype='O')) + getattr(tm, assert_method)(result, expected) pytest.raises(TypeError, to_datetime, [1, '1'], errors='raise', - cache=cache) + cache=cache, box=box) def test_to_datetime_other_datetime64_units(self): # 5/25/2012 @@ -1077,7 +1086,9 @@ def test_string_na_nat_conversion(self, cache): cache=cache)) result = to_datetime(malformed, errors='ignore', cache=cache) - tm.assert_numpy_array_equal(result, malformed) + # GH 21864 + expected = Index(malformed) + tm.assert_index_equal(result, expected) pytest.raises(ValueError, to_datetime, malformed, errors='raise', cache=cache) diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index d05fd689ed754..5750b51cd6edc 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -2325,7 +2325,7 @@ def test_concat_datetime_timezone(self): '2011-01-01 01:00:00+01:00', '2011-01-01 02:00:00+01:00'], freq='H' - ).tz_localize('UTC').tz_convert('Europe/Paris') + ).tz_convert('UTC').tz_convert('Europe/Paris') expected = pd.DataFrame([[1, 1], [2, 2], [3, 3]], index=exp_idx, columns=['a', 'b']) @@ -2343,7 +2343,7 @@ def test_concat_datetime_timezone(self): '2010-12-31 23:00:00+00:00', '2011-01-01 00:00:00+00:00', '2011-01-01 01:00:00+00:00'] - ).tz_localize('UTC') + ) expected = pd.DataFrame([[np.nan, 1], [np.nan, 2], [np.nan, 3], [1, np.nan], [2, np.nan], [3, np.nan]], diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 25e64aa82cc36..128337455f273 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -331,7 +331,8 @@ def test_datetime64_dtype_array_returned(self): dt_index = pd.to_datetime(['2015-01-03T00:00:00.000000000+0000', '2015-01-01T00:00:00.000000000+0000', - '2015-01-01T00:00:00.000000000+0000']) + '2015-01-01T00:00:00.000000000+0000'], + box=False) result = algos.unique(dt_index) tm.assert_numpy_array_equal(result, expected) assert result.dtype == expected.dtype diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index a5d83c1c26948..579af4afd6a88 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -668,16 +668,15 @@ def test_value_counts_datetime64(self, klass): s = klass(df['dt'].copy()) s.name = None - - idx = pd.to_datetime(['2010-01-01 00:00:00Z', - '2008-09-09 00:00:00Z', - '2009-01-01 00:00:00Z']) + idx = pd.to_datetime(['2010-01-01 00:00:00', + '2008-09-09 00:00:00', + '2009-01-01 00:00:00']) expected_s = Series([3, 2, 1], index=idx) tm.assert_series_equal(s.value_counts(), expected_s) - expected = np_array_datetime64_compat(['2010-01-01 00:00:00Z', - '2009-01-01 00:00:00Z', - '2008-09-09 00:00:00Z'], + expected = np_array_datetime64_compat(['2010-01-01 00:00:00', + '2009-01-01 00:00:00', + '2008-09-09 00:00:00'], dtype='datetime64[ns]') if isinstance(s, Index): tm.assert_index_equal(s.unique(), DatetimeIndex(expected)) diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py index 60f23309b11d9..1e6c6ba5578a2 100644 --- a/pandas/tests/test_resample.py +++ b/pandas/tests/test_resample.py @@ -2677,8 +2677,8 @@ def test_resample_with_dst_time_change(self): '2016-03-14 13:00:00-05:00', '2016-03-15 01:00:00-05:00', '2016-03-15 13:00:00-05:00'] - index = pd.DatetimeIndex(expected_index_values, - tz='UTC').tz_convert('America/Chicago') + index = pd.to_datetime(expected_index_values, utc=True).tz_convert( + 'America/Chicago') expected = pd.DataFrame([1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0], index=index) From 1d527ff36385b578a94a9e01c174e99cc97c5f81 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Wed, 11 Jul 2018 16:39:22 -0700 Subject: [PATCH 12/29] Adjust test --- pandas/tests/indexes/datetimes/test_tools.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index 1156146ef328e..fb07073986149 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -495,7 +495,7 @@ def test_to_datetime_tz_psycopg2(self, cache): tm.assert_index_equal(result, i) result = pd.to_datetime(i, errors='coerce', utc=True, cache=cache) - expected = pd.DatetimeIndex(['2000-01-01 13:00:00'], + expected = pd.DatetimeIndex(['2000-01-01 08:00:00'], dtype='datetime64[ns, UTC]') tm.assert_index_equal(result, expected) From f89d6b6b02a0e750124430720bcab1509cf05bd5 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Wed, 11 Jul 2018 17:00:26 -0700 Subject: [PATCH 13/29] Cleanup and add comments --- pandas/_libs/tslib.pyx | 16 +++++----------- pandas/core/tools/datetimes.py | 2 +- pandas/tests/indexes/datetimes/test_tools.py | 4 ++-- pandas/tests/tslibs/test_array_to_datetime.py | 5 ++--- 4 files changed, 10 insertions(+), 17 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index b00d58d99c64d..45ce108776642 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -456,14 +456,15 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', is encountered Also returns a pytz.FixedOffset if an array of strings with the same - timezone offset if passed and utc=True is not passed + timezone offset is passed and utc=True is not passed. Otherwise, None + is returned Handles datetime.date, datetime.datetime, np.datetime64 objects, numeric, strings Returns ------- - (ndarray, timezone offset) + tuple (ndarray, timezone offset) """ cdef: Py_ssize_t i, n = len(values) @@ -481,18 +482,12 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', bint is_coerce = errors=='coerce' _TSObject _ts int out_local=0, out_tzoffset=0 - # Can't directly create a ndarray[int] out_local, - # since most np.array constructors expect a long dtype - # while _string_to_dts expects purely int - # maybe something I am missing? - ndarray[int64_t] out_local_values ndarray[int64_t] out_tzoffset_vals # specify error conditions assert is_raise or is_ignore or is_coerce try: - out_local_values = np.empty(n, dtype=np.int64) out_tzoffset_vals = np.empty(n, dtype=np.int64) result = np.empty(n, dtype='M8[ns]') iresult = result.view('i8') @@ -630,7 +625,6 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', # No error raised by string_to_dts, pick back up # where we left off out_tzoffset_vals[i] = out_tzoffset - out_local_values[i] = out_local value = dtstruct_to_dt64(&dts) if out_local == 1: seen_datetime_offset = 1 @@ -685,12 +679,12 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', # object path where an array of datetimes # (with individual datutil.tzoffsets) are returned - # Faster to compare integers than to compare objects + # Faster to compare integers than to compare pytz objects is_same_offsets = (out_tzoffset_vals[0] == out_tzoffset_vals).all() if not is_same_offsets: raise TypeError else: - tz_out = pytz.FixedOffset(out_tzoffset_vals[0]) + tz_out = pytz.FixedOffset(out_tzoffset) return result, tz_out except OutOfBoundsDatetime: diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 248c7e1614af3..b6aeca1a9eb7b 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -412,7 +412,7 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False, datetime.datetime objects as well). box : boolean, default True - - If True returns a DatetimeIndex or Index + - If True returns a DatetimeIndex or Index-like object - If False returns ndarray of values. format : string, default None strftime to parse time, eg "%d/%m/%Y", note that "%f" will parse diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index fb07073986149..f9480f9511422 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -580,7 +580,7 @@ def test_week_without_day_and_calendar_year(self, date, format): with tm.assert_raises_regex(ValueError, msg): pd.to_datetime(date, format=format) - def test_ts_strings_with_same_offset(self): + def test_iso_8601_strings_with_same_offset(self): # GH 17697, 11736 ts_str = "2015-11-18 15:30:00+05:30" result = to_datetime(ts_str) @@ -594,7 +594,7 @@ def test_ts_strings_with_same_offset(self): result = DatetimeIndex([ts_str] * 2) tm.assert_index_equal(result, expected) - def test_ts_strings_with_different_offsets(self): + def test_iso_8601_strings_with_different_offsets(self): # GH 17697, 11736 ts_strings = ["2015-11-18 15:30:00+05:30", "2015-11-18 16:30:00+06:30"] diff --git a/pandas/tests/tslibs/test_array_to_datetime.py b/pandas/tests/tslibs/test_array_to_datetime.py index a605eea6abeb5..dd0609ea22885 100644 --- a/pandas/tests/tslibs/test_array_to_datetime.py +++ b/pandas/tests/tslibs/test_array_to_datetime.py @@ -78,9 +78,8 @@ def test_parsing_timezone_offsets(self, dt_string, expected_tz): # All of these datetime strings with offsets are equivalent # to the same datetime after the timezone offset is added - # TODO: Appears that parsing non-ISO strings adjust the date to UTC - # but don't return the offset. Not sure if this is the intended - # behavior of non-iso strings in np_datetime_strings + # Non-ISO 8601 datetime strings will not return a timezone offset + # as a limitation of the C parser arr = np.array(['01-01-2013 00:00:00'], dtype=object) expected = tslib.array_to_datetime(arr)[0] From d91c63f80d5104dae9f57c1fa8dff364e82d9910 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Wed, 11 Jul 2018 21:29:03 -0700 Subject: [PATCH 14/29] address comments --- pandas/_libs/tslib.pyx | 68 +++++++++++++++++++++++++----------------- 1 file changed, 41 insertions(+), 27 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 45ce108776642..c4a382bfba78b 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -464,7 +464,7 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', Returns ------- - tuple (ndarray, timezone offset) + tuple (ndarray, tzoffset) """ cdef: Py_ssize_t i, n = len(values) @@ -669,7 +669,8 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', raise ValueError( "mixed datetimes and integers in passed array") else: - raise TypeError + result, tz_out = array_to_datetime_object(values, is_raise, + dayfirst, yearfirst) if seen_datetime_offset and not utc_convert: # GH 17697 @@ -677,12 +678,13 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', # the parsed dates to (maybe) pass to DatetimeIndex # 2) If the offsets are different, then force the parsing down the # object path where an array of datetimes - # (with individual datutil.tzoffsets) are returned + # (with individual dateutil.tzoffsets) are returned # Faster to compare integers than to compare pytz objects is_same_offsets = (out_tzoffset_vals[0] == out_tzoffset_vals).all() if not is_same_offsets: - raise TypeError + result, tz_out = array_to_datetime_object(values, is_raise, + dayfirst, yearfirst) else: tz_out = pytz.FixedOffset(out_tzoffset) @@ -709,34 +711,46 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', else: oresult[i] = val return oresult, tz_out - except TypeError: - oresult = np.empty(n, dtype=object) - for i in range(n): - val = values[i] - if checknull_with_nat(val): - oresult[i] = val - elif is_string_object(val): - if len(val) == 0 or val in nat_strings: - oresult[i] = 'NaT' - continue +cdef array_to_datetime_object(ndarray[object] values, bint is_raise, + dayfirst=False, yearfirst=False): + """ + Fall back function for array_to_datetime - try: - oresult[i] = parse_datetime_string(val, dayfirst=dayfirst, - yearfirst=yearfirst) - pydatetime_to_dt64(oresult[i], &dts) - check_dts_bounds(&dts) - except Exception: - if is_raise: - raise - return values, tz_out - else: + Attempts to parse datetime strings with dateutil to return an array + of datetime objects + """ + cdef: + Py_ssize_t i, n = len(values) + object val, + ndarray[object] oresult + pandas_datetimestruct dts + + oresult = np.empty(n, dtype=object) + + for i in range(n): + val = values[i] + if checknull_with_nat(val): + oresult[i] = val + elif is_string_object(val): + if len(val) == 0 or val in nat_strings: + oresult[i] = 'NaT' + continue + try: + oresult[i] = parse_datetime_string(val, dayfirst=dayfirst, + yearfirst=yearfirst) + pydatetime_to_dt64(oresult[i], &dts) + check_dts_bounds(&dts) + except Exception: if is_raise: raise - return values, tz_out - - return oresult, tz_out + return values, None + else: + if is_raise: + raise + return values, None + return oresult, None cdef inline bint _parse_today_now(str val, int64_t* iresult): From 1054e8b14853c3bac896e3fb91ac9eca89e6b0ba Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Wed, 11 Jul 2018 21:48:04 -0700 Subject: [PATCH 15/29] adjust test to be closer to the original behavior --- pandas/tests/indexes/datetimes/test_tools.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index f9480f9511422..0c1ea556399f3 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -484,10 +484,8 @@ def test_to_datetime_tz_psycopg2(self, cache): # dtype coercion i = pd.DatetimeIndex([ - '2000-01-01 08:00:00+00:00' - ]) - i = i.tz_convert(psycopg2.tz.FixedOffsetTimezone(offset=-300, - name=None)) + '2000-01-01 08:00:00' + ], tz=psycopg2.tz.FixedOffsetTimezone(offset=-300, name=None)) assert is_datetime64_ns_dtype(i) # tz coerceion @@ -495,7 +493,7 @@ def test_to_datetime_tz_psycopg2(self, cache): tm.assert_index_equal(result, i) result = pd.to_datetime(i, errors='coerce', utc=True, cache=cache) - expected = pd.DatetimeIndex(['2000-01-01 08:00:00'], + expected = pd.DatetimeIndex(['2000-01-01 13:00:00'], dtype='datetime64[ns, UTC]') tm.assert_index_equal(result, expected) From 7d04613634edecf4e685effc1ca010a3d5789e21 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Wed, 11 Jul 2018 23:27:42 -0700 Subject: [PATCH 16/29] Add TypeError clause --- pandas/_libs/tslib.pyx | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index c4a382bfba78b..0d43e100cf91b 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -669,8 +669,7 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', raise ValueError( "mixed datetimes and integers in passed array") else: - result, tz_out = array_to_datetime_object(values, is_raise, - dayfirst, yearfirst) + raise ValueError if seen_datetime_offset and not utc_convert: # GH 17697 @@ -711,6 +710,8 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', else: oresult[i] = val return oresult, tz_out + except TypeError: + return array_to_datetime(values, is_raise, dayfirst, yearfirst) cdef array_to_datetime_object(ndarray[object] values, bint is_raise, From 031284c80609ba54cb91323411dadb9b6f9c9b0b Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Wed, 11 Jul 2018 23:29:06 -0700 Subject: [PATCH 17/29] Add TypeError not ValueError --- pandas/_libs/tslib.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 0d43e100cf91b..e78a76b8a190f 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -669,7 +669,7 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', raise ValueError( "mixed datetimes and integers in passed array") else: - raise ValueError + raise TypeError if seen_datetime_offset and not utc_convert: # GH 17697 From 23cbf7532266af6f0343386a5d8b563dad9450bf Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Thu, 12 Jul 2018 16:23:33 -0700 Subject: [PATCH 18/29] fix typo --- pandas/_libs/tslib.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index e78a76b8a190f..1e35ce5442a96 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -711,7 +711,7 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', oresult[i] = val return oresult, tz_out except TypeError: - return array_to_datetime(values, is_raise, dayfirst, yearfirst) + return array_to_datetime_object(values, is_raise, dayfirst, yearfirst) cdef array_to_datetime_object(ndarray[object] values, bint is_raise, From c1f51cdd0741ac324a94544f2843244bf8438807 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Thu, 19 Jul 2018 09:30:44 -0700 Subject: [PATCH 19/29] New implimentation --- pandas/_libs/tslib.pyx | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index e373868dd33d8..79a2f0a13a739 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -481,13 +481,12 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', bint is_coerce = errors=='coerce' _TSObject _ts int out_local=0, out_tzoffset=0 - ndarray[int64_t] out_tzoffset_vals + set out_tzoffset_vals = set() # specify error conditions assert is_raise or is_ignore or is_coerce try: - out_tzoffset_vals = np.empty(n, dtype=np.int64) result = np.empty(n, dtype='M8[ns]') iresult = result.view('i8') for i in range(n): @@ -607,6 +606,13 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', raise TypeError("invalid string coercion to datetime") try: + if py_dt.tzinfo is not None: + seen_datetime_offset = 1 + out_tzoffset_vals.add(py_dt.tzinfo) + else: + # Add a marker for naive string, to track if we are + # parsing mixed naive and aware strings + out_tzoffset_vals.add('naive') _ts = convert_datetime_to_tsobject(py_dt, None) iresult[i] = _ts.value except OutOfBoundsDatetime: @@ -623,12 +629,16 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', else: # No error raised by string_to_dts, pick back up # where we left off - out_tzoffset_vals[i] = out_tzoffset value = dtstruct_to_dt64(&dts) if out_local == 1: seen_datetime_offset = 1 tz = pytz.FixedOffset(out_tzoffset) + out_tzoffset_vals.add(tz) value = tz_convert_single(value, tz, 'UTC') + else: + # Add a marker for naive string, to track if we are + # parsing mixed naive and aware strings + out_tzoffset_vals.add('naive') iresult[i] = value try: check_dts_bounds(&dts) @@ -672,19 +682,18 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', if seen_datetime_offset and not utc_convert: # GH 17697 - # 1) If all the offsets are equal, return one pytz.FixedOffset for + # 1) If all the offsets are equal, return one offset for # the parsed dates to (maybe) pass to DatetimeIndex # 2) If the offsets are different, then force the parsing down the # object path where an array of datetimes # (with individual dateutil.tzoffsets) are returned - # Faster to compare integers than to compare pytz objects - is_same_offsets = (out_tzoffset_vals[0] == out_tzoffset_vals).all() + is_same_offsets = len(out_tzoffset_vals) == 1 if not is_same_offsets: result, tz_out = array_to_datetime_object(values, is_raise, dayfirst, yearfirst) else: - tz_out = pytz.FixedOffset(out_tzoffset) + tz_out = out_tzoffset_vals[0] return result, tz_out except OutOfBoundsDatetime: @@ -725,7 +734,7 @@ cdef array_to_datetime_object(ndarray[object] values, bint is_raise, Py_ssize_t i, n = len(values) object val, ndarray[object] oresult - pandas_datetimestruct dts + npy_datetimestruct dts oresult = np.empty(n, dtype=object) From 4733ac5cee6738f9e43266e1fbe0b79e2e7334e6 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Thu, 19 Jul 2018 23:45:01 -0700 Subject: [PATCH 20/29] Change implimentation and add some tests --- pandas/_libs/tslib.pyx | 3 +-- pandas/tests/indexes/datetimes/test_tools.py | 17 +++++++++++++--- pandas/tests/tslibs/test_array_to_datetime.py | 20 ++++++++++++------- 3 files changed, 28 insertions(+), 12 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 79a2f0a13a739..bf50e2f620bde 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -693,8 +693,7 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', result, tz_out = array_to_datetime_object(values, is_raise, dayfirst, yearfirst) else: - tz_out = out_tzoffset_vals[0] - + tz_out = out_tzoffset_vals.pop() return result, tz_out except OutOfBoundsDatetime: if is_raise: diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index 0c1ea556399f3..ee942b39f42a2 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -595,19 +595,30 @@ def test_iso_8601_strings_with_same_offset(self): def test_iso_8601_strings_with_different_offsets(self): # GH 17697, 11736 ts_strings = ["2015-11-18 15:30:00+05:30", - "2015-11-18 16:30:00+06:30"] + "2015-11-18 16:30:00+06:30", + NaT] result = to_datetime(ts_strings) expected = np.array([datetime(2015, 11, 18, 15, 30, tzinfo=tzoffset(None, 19800)), datetime(2015, 11, 18, 16, 30, - tzinfo=tzoffset(None, 23400))], + tzinfo=tzoffset(None, 23400)), + NaT], dtype=object) # GH 21864 expected = Index(expected) tm.assert_index_equal(result, expected) result = to_datetime(ts_strings, utc=True) - expected = DatetimeIndex([Timestamp(2015, 11, 18, 10)] * 2, tz='UTC') + expected = DatetimeIndex([Timestamp(2015, 11, 18, 10), + Timestamp(2015, 11, 18, 10), + NaT], tz='UTC') + tm.assert_index_equal(result, expected) + + def test_non_iso_strings_with_tz_offset(self): + # We should get back a dateutil.tzoffset tz dtype here + result = to_datetime(['March 1, 2018 12:00:00+0400'] * 2) + expected = DatetimeIndex([datetime(2018, 3, 1, 12, + tzinfo=tzoffset(None, 14400))] * 2) tm.assert_index_equal(result, expected) diff --git a/pandas/tests/tslibs/test_array_to_datetime.py b/pandas/tests/tslibs/test_array_to_datetime.py index dd0609ea22885..71a9b6683427f 100644 --- a/pandas/tests/tslibs/test_array_to_datetime.py +++ b/pandas/tests/tslibs/test_array_to_datetime.py @@ -70,16 +70,11 @@ def test_parsing_valid_dates(self): np_array_datetime64_compat(expected, dtype='M8[ns]')) @pytest.mark.parametrize('dt_string, expected_tz', [ - ['01-01-2013 08:00:00+08:00', None], ['2013-01-01T08:00:00.000000000+0800', pytz.FixedOffset(480)], - ['2012-12-31T16:00:00.000000000-0800', pytz.FixedOffset(-480)], - ['12-31-2012 23:00:00-01:00', None]]) - def test_parsing_timezone_offsets(self, dt_string, expected_tz): + ['2012-12-31T16:00:00.000000000-0800', pytz.FixedOffset(-480)]]) + def test_parsing_iso_timezone_offsets(self, dt_string, expected_tz): # All of these datetime strings with offsets are equivalent # to the same datetime after the timezone offset is added - - # Non-ISO 8601 datetime strings will not return a timezone offset - # as a limitation of the C parser arr = np.array(['01-01-2013 00:00:00'], dtype=object) expected = tslib.array_to_datetime(arr)[0] @@ -88,6 +83,17 @@ def test_parsing_timezone_offsets(self, dt_string, expected_tz): tm.assert_numpy_array_equal(result, expected) assert result_tz is expected_tz + def test_parse_non_iso_timezone_offsets(self): + # Non-ISO 8601 datetime strings will not return a timezone offset + # as a limitation of the C parser. tzinfo will be populated though + arr = np.array(['01-01-2013 08:00:00+08:00'], dtype=object) + result, result_tz = tslib.array_to_datetime(arr) + expected = np.array([datetime(2013, 1, 1, 8, 0, + tzinfo=tzoffset(None, 28800))], + dtype=object) + tm.assert_numpy_array_equal(result, expected) + assert result_tz is None + def test_parsing_different_timezone_offsets(self): # GH 17697 data = ["2015-11-18 15:30:00+05:30", "2015-11-18 15:30:00+06:30"] From 2fa681f2bdea3b0048dfe42c5205cbaccd850979 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Fri, 20 Jul 2018 15:05:34 -0700 Subject: [PATCH 21/29] Add missing commas --- pandas/tests/indexes/datetimes/test_arithmetic.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/indexes/datetimes/test_arithmetic.py b/pandas/tests/indexes/datetimes/test_arithmetic.py index 4ce2b1dd4fd86..1e54e6563d598 100644 --- a/pandas/tests/indexes/datetimes/test_arithmetic.py +++ b/pandas/tests/indexes/datetimes/test_arithmetic.py @@ -423,13 +423,13 @@ def test_dti_shift_tzaware(self, tz_naive_fixture): tm.assert_index_equal(idx.shift(0, freq='H'), idx) tm.assert_index_equal(idx.shift(3, freq='H'), idx) - idx = pd.DatetimeIndex(['2011-01-01 10:00', '2011-01-01 11:00' + idx = pd.DatetimeIndex(['2011-01-01 10:00', '2011-01-01 11:00', '2011-01-01 12:00'], name='xxx', tz=tz) tm.assert_index_equal(idx.shift(0, freq='H'), idx) - exp = pd.DatetimeIndex(['2011-01-01 13:00', '2011-01-01 14:00' + exp = pd.DatetimeIndex(['2011-01-01 13:00', '2011-01-01 14:00', '2011-01-01 15:00'], name='xxx', tz=tz) tm.assert_index_equal(idx.shift(3, freq='H'), exp) - exp = pd.DatetimeIndex(['2011-01-01 07:00', '2011-01-01 08:00' + exp = pd.DatetimeIndex(['2011-01-01 07:00', '2011-01-01 08:00', '2011-01-01 09:00'], name='xxx', tz=tz) tm.assert_index_equal(idx.shift(-3, freq='H'), exp) From d7ff275c4e054663a39e1f2bb0fb6be7ffc365e6 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Tue, 24 Jul 2018 22:23:58 -0700 Subject: [PATCH 22/29] Change implimentation since tzoffsets cannot be hashed --- pandas/_libs/tslib.pyx | 24 +++++++++++++++---- pandas/tests/indexes/datetimes/test_tools.py | 3 +-- pandas/tests/tslibs/test_array_to_datetime.py | 19 +++++++-------- 3 files changed, 29 insertions(+), 17 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index b7e41032794b8..191e962e03ed3 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -19,6 +19,7 @@ import numpy as np cnp.import_array() import pytz +from dateutil.tz import tzutc as dateutil_utc from util cimport (is_integer_object, is_float_object, is_string_object, @@ -490,6 +491,7 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', bint is_coerce = errors=='coerce' _TSObject _ts int out_local=0, out_tzoffset=0 + float offset_seconds set out_tzoffset_vals = set() # specify error conditions @@ -608,6 +610,7 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', try: py_dt = parse_datetime_string(val, dayfirst=dayfirst, yearfirst=yearfirst) + tz = py_dt.tzinfo except Exception: if is_coerce: iresult[i] = NPY_NAT @@ -615,9 +618,17 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', raise TypeError("invalid string coercion to datetime") try: - if py_dt.tzinfo is not None: + if tz is not None: seen_datetime_offset = 1 - out_tzoffset_vals.add(py_dt.tzinfo) + if tz is dateutil_utc(): + # dateutil.tz.tzutc has no _offset attribute + # Just add the 0 offset explicitly + out_tzoffset_vals.add(0) + else: + # dateutil.tz.tzoffset objects cannot be hashed + # store the total_seconds() instead + offset_seconds = tz._offset.total_seconds() + out_tzoffset_vals.add(offset_seconds) else: # Add a marker for naive string, to track if we are # parsing mixed naive and aware strings @@ -641,8 +652,11 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', value = dtstruct_to_dt64(&dts) if out_local == 1: seen_datetime_offset = 1 + # Store the out_tzoffset in seconds + # since we store the total_seconds of + # dateutil.tz.tzoffset objects + out_tzoffset_vals.add(out_tzoffset * 60.) tz = pytz.FixedOffset(out_tzoffset) - out_tzoffset_vals.add(tz) value = tz_convert_single(value, tz, 'UTC') else: # Add a marker for naive string, to track if we are @@ -696,13 +710,13 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', # 2) If the offsets are different, then force the parsing down the # object path where an array of datetimes # (with individual dateutil.tzoffsets) are returned - is_same_offsets = len(out_tzoffset_vals) == 1 if not is_same_offsets: result, tz_out = array_to_datetime_object(values, is_raise, dayfirst, yearfirst) else: - tz_out = out_tzoffset_vals.pop() + tz_offset = out_tzoffset_vals.pop() + tz_out = pytz.FixedOffset(tz_offset / 60.) return result, tz_out except OutOfBoundsDatetime: if is_raise: diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index ee942b39f42a2..72e5358f21966 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -615,10 +615,9 @@ def test_iso_8601_strings_with_different_offsets(self): tm.assert_index_equal(result, expected) def test_non_iso_strings_with_tz_offset(self): - # We should get back a dateutil.tzoffset tz dtype here result = to_datetime(['March 1, 2018 12:00:00+0400'] * 2) expected = DatetimeIndex([datetime(2018, 3, 1, 12, - tzinfo=tzoffset(None, 14400))] * 2) + tzinfo=pytz.FixedOffset(240))] * 2) tm.assert_index_equal(result, expected) diff --git a/pandas/tests/tslibs/test_array_to_datetime.py b/pandas/tests/tslibs/test_array_to_datetime.py index 71a9b6683427f..a9f9c7ebff3eb 100644 --- a/pandas/tests/tslibs/test_array_to_datetime.py +++ b/pandas/tests/tslibs/test_array_to_datetime.py @@ -70,9 +70,11 @@ def test_parsing_valid_dates(self): np_array_datetime64_compat(expected, dtype='M8[ns]')) @pytest.mark.parametrize('dt_string, expected_tz', [ + ['01-01-2013 08:00:00+08:00', pytz.FixedOffset(480)], ['2013-01-01T08:00:00.000000000+0800', pytz.FixedOffset(480)], - ['2012-12-31T16:00:00.000000000-0800', pytz.FixedOffset(-480)]]) - def test_parsing_iso_timezone_offsets(self, dt_string, expected_tz): + ['2012-12-31T16:00:00.000000000-0800', pytz.FixedOffset(-480)], + ['12-31-2012 23:00:00-01:00', pytz.FixedOffset(-60)]]) + def test_parsing_timezone_offsets(self, dt_string, expected_tz): # All of these datetime strings with offsets are equivalent # to the same datetime after the timezone offset is added arr = np.array(['01-01-2013 00:00:00'], dtype=object) @@ -83,16 +85,13 @@ def test_parsing_iso_timezone_offsets(self, dt_string, expected_tz): tm.assert_numpy_array_equal(result, expected) assert result_tz is expected_tz - def test_parse_non_iso_timezone_offsets(self): - # Non-ISO 8601 datetime strings will not return a timezone offset - # as a limitation of the C parser. tzinfo will be populated though - arr = np.array(['01-01-2013 08:00:00+08:00'], dtype=object) + def test_parsing_non_iso_timezone_offset(self): + dt_string = '01-01-2013T00:00:00.000000000+0000' + arr = np.array([dt_string], dtype=object) result, result_tz = tslib.array_to_datetime(arr) - expected = np.array([datetime(2013, 1, 1, 8, 0, - tzinfo=tzoffset(None, 28800))], - dtype=object) + expected = np.array([np.datetime64('2013-01-01 00:00:00.000000000')]) tm.assert_numpy_array_equal(result, expected) - assert result_tz is None + assert result_tz is pytz.FixedOffset(0) def test_parsing_different_timezone_offsets(self): # GH 17697 From 4ff7cb377d886ef8b515c4ca1ceccffde99c4201 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Tue, 24 Jul 2018 23:02:40 -0700 Subject: [PATCH 23/29] Add whatsnew --- doc/source/whatsnew/v0.24.0.txt | 57 +++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 137fd5aafe5bd..3bf07b99b4c91 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -224,6 +224,62 @@ For situations where you need an ``ndarray`` of ``Interval`` objects, use np.asarray(idx) idx.values.astype(object) +.. _whatsnew_0240.api.timezone_offset_parsing: + +Parsing Datetime Strings with Timezone Offsets +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Before, parsing datetime strings with UTC offsets with :func:`to_datetime` +or :class:`DatetimeIndex` would automatically convert the datetime to UTC +without timezone localization. This is inconsistent from parsing the same +datetime string with :class:`Timestamp` which would preserve the UTC +offset in the ``tz`` attribute. Now, :func:`to_datetime` preserves the UTC +offset in the ``tz`` attribute when all the datetime strings have the same +UTC offset (:issue:`17697`, :issue:`11736`) + +*Previous Behavior*: + +.. code-block:: ipython + + + In [2]: pd.to_datetime("2015-11-18 15:30:00+05:30") + Out[2]: Timestamp('2015-11-18 10:00:00') + + In [3]: pd.Timestamp("2015-11-18 15:30:00+05:30") + Out[3]: Timestamp('2015-11-18 15:30:00+0530', tz='pytz.FixedOffset(330)') + + # Different UTC offsets would automatically converted the datetimes to UTC (without a UTC timezone) + In [4]: pd.to_datetime(["2015-11-18 15:30:00+05:30", "2015-11-18 16:30:00+06:30"]) + Out[4]: DatetimeIndex(['2015-11-18 10:00:00', '2015-11-18 10:00:00'], dtype='datetime64[ns]', freq=None) + +*Current Behavior*: + +.. ipython:: python + + pd.to_datetime("2015-11-18 15:30:00+05:30") + pd.Timestamp("2015-11-18 15:30:00+05:30") + +Parsing datetime strings with the same UTC offset will preserve the UTC offset in the ``tz`` + +.. ipython:: python + + pd.to_datetime(["2015-11-18 15:30:00+05:30"] * 2) + +Parsing datetime strings with different UTC offsets will now create an Index of +``datetime.datetime`` objects with different UTC offsets + +.. ipython:: python + + idx = pd.to_datetime(["2015-11-18 15:30:00+05:30", "2015-11-18 16:30:00+06:30"]) + idx + idx[0] + idx[1] + +Passing ``utc=True`` will mimic the previous behavior but will correctly indicate +that the dates have been converted to UTC + +.. ipython:: python + idx = pd.to_datetime(["2015-11-18 15:30:00+05:30", "2015-11-18 16:30:00+06:30"], utc=True) .. _whatsnew_0240.api.datetimelike.normalize: @@ -439,6 +495,7 @@ Datetimelike - Fixed bug where two :class:`DateOffset` objects with different ``normalize`` attributes could evaluate as equal (:issue:`21404`) - Fixed bug where :meth:`Timestamp.resolution` incorrectly returned 1-microsecond ``timedelta`` instead of 1-nanosecond :class:`Timedelta` (:issue:`21336`,:issue:`21365`) +- Bug in :func:`to_datetime` that did not consistently return an :class:`Index` when ``box=True`` was specified (:issue:`21864`) Timedelta ^^^^^^^^^ From 8463d9146fc7dc20e93c7ba8120b3ce2c1a0427c Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Wed, 25 Jul 2018 10:22:30 -0700 Subject: [PATCH 24/29] Address review --- doc/source/whatsnew/v0.24.0.txt | 4 +- pandas/_libs/tslib.pyx | 73 +++++++++++++++++++++++++-------- 2 files changed, 57 insertions(+), 20 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 3bf07b99b4c91..b0d2887eec35d 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -229,7 +229,7 @@ For situations where you need an ``ndarray`` of ``Interval`` objects, use Parsing Datetime Strings with Timezone Offsets ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Before, parsing datetime strings with UTC offsets with :func:`to_datetime` +Previously, parsing datetime strings with UTC offsets with :func:`to_datetime` or :class:`DatetimeIndex` would automatically convert the datetime to UTC without timezone localization. This is inconsistent from parsing the same datetime string with :class:`Timestamp` which would preserve the UTC @@ -248,7 +248,7 @@ UTC offset (:issue:`17697`, :issue:`11736`) In [3]: pd.Timestamp("2015-11-18 15:30:00+05:30") Out[3]: Timestamp('2015-11-18 15:30:00+0530', tz='pytz.FixedOffset(330)') - # Different UTC offsets would automatically converted the datetimes to UTC (without a UTC timezone) + # Different UTC offsets would automatically convert the datetimes to UTC (without a UTC timezone) In [4]: pd.to_datetime(["2015-11-18 15:30:00+05:30", "2015-11-18 16:30:00+06:30"]) Out[4]: DatetimeIndex(['2015-11-18 10:00:00', '2015-11-18 10:00:00'], dtype='datetime64[ns]', freq=None) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 191e962e03ed3..3d83d639d2a93 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -471,6 +471,23 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', Handles datetime.date, datetime.datetime, np.datetime64 objects, numeric, strings + Parameters + ---------- + values : ndarray of object + date-like objects to convert + errors : str, default 'raise' + error behavior when parsing + dayfirst : bool, default False + dayfirst parsing behavior when encountering datetime strings + yearfirst : bool, default False + yearfirst parsing behavior when encountering datetime strings + format : str, default None + format of the string to parse + utc : bool, default None + indicator whether the dates should be UTC + require_iso8601 : bool, default False + indicator whether the datetime string should be iso8601 + Returns ------- tuple (ndarray, tzoffset) @@ -610,29 +627,31 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', try: py_dt = parse_datetime_string(val, dayfirst=dayfirst, yearfirst=yearfirst) - tz = py_dt.tzinfo except Exception: if is_coerce: iresult[i] = NPY_NAT continue raise TypeError("invalid string coercion to datetime") - try: - if tz is not None: - seen_datetime_offset = 1 - if tz is dateutil_utc(): - # dateutil.tz.tzutc has no _offset attribute - # Just add the 0 offset explicitly - out_tzoffset_vals.add(0) - else: - # dateutil.tz.tzoffset objects cannot be hashed - # store the total_seconds() instead - offset_seconds = tz._offset.total_seconds() - out_tzoffset_vals.add(offset_seconds) + # If the dateutil parser returned tzinfo, capture it + # to check if all arguments have the same tzinfo + tz = py_dt.tzinfo + if tz is not None: + seen_datetime_offset = 1 + if tz is dateutil_utc(): + # dateutil.tz.tzutc has no _offset attribute + # Just add the 0 offset explicitly + out_tzoffset_vals.add(0) else: - # Add a marker for naive string, to track if we are - # parsing mixed naive and aware strings - out_tzoffset_vals.add('naive') + # dateutil.tz.tzoffset objects cannot be hashed + # store the total_seconds() instead + offset_seconds = tz._offset.total_seconds() + out_tzoffset_vals.add(offset_seconds) + else: + # Add a marker for naive string, to track if we are + # parsing mixed naive and aware strings + out_tzoffset_vals.add('naive') + try: _ts = convert_datetime_to_tsobject(py_dt, None) iresult[i] = _ts.value except OutOfBoundsDatetime: @@ -712,8 +731,8 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', # (with individual dateutil.tzoffsets) are returned is_same_offsets = len(out_tzoffset_vals) == 1 if not is_same_offsets: - result, tz_out = array_to_datetime_object(values, is_raise, - dayfirst, yearfirst) + return array_to_datetime_object(values, is_raise, + dayfirst, yearfirst) else: tz_offset = out_tzoffset_vals.pop() tz_out = pytz.FixedOffset(tz_offset / 60.) @@ -751,6 +770,21 @@ cdef array_to_datetime_object(ndarray[object] values, bint is_raise, Attempts to parse datetime strings with dateutil to return an array of datetime objects + + Parameters + ---------- + values : ndarray of object + date-like objects to convert + is_raise : bool + error behavior when parsing + dayfirst : bool, default False + dayfirst parsing behavior when encountering datetime strings + yearfirst : bool, default False + yearfirst parsing behavior when encountering datetime strings + + Returns + ------- + tuple (ndarray, None) """ cdef: Py_ssize_t i, n = len(values) @@ -760,6 +794,9 @@ cdef array_to_datetime_object(ndarray[object] values, bint is_raise, oresult = np.empty(n, dtype=object) + # We return an object array and only attempt to parse: + # 1) NaT or NaT-like values + # 2) datetime strings, which we return as datetime.datetime for i in range(n): val = values[i] if checknull_with_nat(val): From dddc6b3a0e56eed59615c72f5870355760f8af42 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Wed, 25 Jul 2018 10:34:24 -0700 Subject: [PATCH 25/29] Address tzlocal --- pandas/_libs/tslib.pyx | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 3d83d639d2a93..a56fb454144d9 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -19,7 +19,7 @@ import numpy as np cnp.import_array() import pytz -from dateutil.tz import tzutc as dateutil_utc +from dateutil.tz import tzlocal, tzutc as dateutil_utc from util cimport (is_integer_object, is_float_object, is_string_object, @@ -639,9 +639,15 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', if tz is not None: seen_datetime_offset = 1 if tz is dateutil_utc(): - # dateutil.tz.tzutc has no _offset attribute + # dateutil.tz.tzutc has no offset-like attribute # Just add the 0 offset explicitly out_tzoffset_vals.add(0) + elif tz == tzlocal(): + # is comparison fails unlike other dateutil.tz + # objects. Also, dateutil.tz.tzlocal has no + # _offset attribute like tzoffset + offset_seconds = tz._dst_offset.total_seconds() + out_tzoffset_vals.add(offset_seconds) else: # dateutil.tz.tzoffset objects cannot be hashed # store the total_seconds() instead From cca3983ecbe82122203a93be43d729cd1a768979 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Wed, 25 Jul 2018 17:59:42 -0700 Subject: [PATCH 26/29] Change is to == for older dateutil compat --- pandas/_libs/tslib.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index a56fb454144d9..db967d44f0401 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -638,7 +638,7 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', tz = py_dt.tzinfo if tz is not None: seen_datetime_offset = 1 - if tz is dateutil_utc(): + if tz == dateutil_utc(): # dateutil.tz.tzutc has no offset-like attribute # Just add the 0 offset explicitly out_tzoffset_vals.add(0) From a8a65f71c2ac1d10934386e142971175588e7a62 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Wed, 25 Jul 2018 22:12:50 -0700 Subject: [PATCH 27/29] Modify example in whatsnew to display --- doc/source/whatsnew/v0.24.0.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index c7040194938ad..555ec65e8ec83 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -279,7 +279,7 @@ Passing ``utc=True`` will mimic the previous behavior but will correctly indicat that the dates have been converted to UTC .. ipython:: python - idx = pd.to_datetime(["2015-11-18 15:30:00+05:30", "2015-11-18 16:30:00+06:30"], utc=True) + pd.to_datetime(["2015-11-18 15:30:00+05:30", "2015-11-18 16:30:00+06:30"], utc=True) .. _whatsnew_0240.api.datetimelike.normalize: From 60524754784fb1a0708a67c70de849589a41a495 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Fri, 27 Jul 2018 15:02:28 -0700 Subject: [PATCH 28/29] Add more specific errors --- pandas/_libs/tslib.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index db967d44f0401..76e3d6e92d31e 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -816,7 +816,7 @@ cdef array_to_datetime_object(ndarray[object] values, bint is_raise, yearfirst=yearfirst) pydatetime_to_dt64(oresult[i], &dts) check_dts_bounds(&dts) - except Exception: + except (ValueError, OverflowError): if is_raise: raise return values, None From 1cbd9b91d0b4da798f63319a05a8cb740729f6f3 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 29 Jul 2018 17:19:52 -0700 Subject: [PATCH 29/29] Add some benchmarks and reformat tests --- asv_bench/benchmarks/timeseries.py | 19 +++++++++++++++++++ pandas/tests/tslibs/test_array_to_datetime.py | 18 +++++++++--------- 2 files changed, 28 insertions(+), 9 deletions(-) diff --git a/asv_bench/benchmarks/timeseries.py b/asv_bench/benchmarks/timeseries.py index eada401d2930b..2c98cc1659519 100644 --- a/asv_bench/benchmarks/timeseries.py +++ b/asv_bench/benchmarks/timeseries.py @@ -343,6 +343,25 @@ def time_iso8601_tz_spaceformat(self): to_datetime(self.strings_tz_space) +class ToDatetimeNONISO8601(object): + + goal_time = 0.2 + + def setup(self): + N = 10000 + half = int(N / 2) + ts_string_1 = 'March 1, 2018 12:00:00+0400' + ts_string_2 = 'March 1, 2018 12:00:00+0500' + self.same_offset = [ts_string_1] * N + self.diff_offset = [ts_string_1] * half + [ts_string_2] * half + + def time_same_offset(self): + to_datetime(self.same_offset) + + def time_different_offset(self): + to_datetime(self.diff_offset) + + class ToDatetimeFormat(object): goal_time = 0.2 diff --git a/pandas/tests/tslibs/test_array_to_datetime.py b/pandas/tests/tslibs/test_array_to_datetime.py index a9f9c7ebff3eb..915687304bfe2 100644 --- a/pandas/tests/tslibs/test_array_to_datetime.py +++ b/pandas/tests/tslibs/test_array_to_datetime.py @@ -54,7 +54,7 @@ def test_parsers_iso8601_invalid(self, date_str): class TestArrayToDatetime(object): def test_parsing_valid_dates(self): arr = np.array(['01-01-2013', '01-02-2013'], dtype=object) - result = tslib.array_to_datetime(arr)[0] + result, _ = tslib.array_to_datetime(arr) expected = ['2013-01-01T00:00:00.000000000-0000', '2013-01-02T00:00:00.000000000-0000'] tm.assert_numpy_array_equal( @@ -62,7 +62,7 @@ def test_parsing_valid_dates(self): np_array_datetime64_compat(expected, dtype='M8[ns]')) arr = np.array(['Mon Sep 16 2013', 'Tue Sep 17 2013'], dtype=object) - result = tslib.array_to_datetime(arr)[0] + result, _ = tslib.array_to_datetime(arr) expected = ['2013-09-16T00:00:00.000000000-0000', '2013-09-17T00:00:00.000000000-0000'] tm.assert_numpy_array_equal( @@ -78,7 +78,7 @@ def test_parsing_timezone_offsets(self, dt_string, expected_tz): # All of these datetime strings with offsets are equivalent # to the same datetime after the timezone offset is added arr = np.array(['01-01-2013 00:00:00'], dtype=object) - expected = tslib.array_to_datetime(arr)[0] + expected, _ = tslib.array_to_datetime(arr) arr = np.array([dt_string], dtype=object) result, result_tz = tslib.array_to_datetime(arr) @@ -111,11 +111,11 @@ def test_number_looking_strings_not_into_datetime(self): # These strings don't look like datetimes so they shouldn't be # attempted to be converted arr = np.array(['-352.737091', '183.575577'], dtype=object) - result = tslib.array_to_datetime(arr, errors='ignore')[0] + result, _ = tslib.array_to_datetime(arr, errors='ignore') tm.assert_numpy_array_equal(result, arr) arr = np.array(['1', '2', '3', '4', '5'], dtype=object) - result = tslib.array_to_datetime(arr, errors='ignore')[0] + result, _ = tslib.array_to_datetime(arr, errors='ignore') tm.assert_numpy_array_equal(result, arr) @pytest.mark.parametrize('invalid_date', [ @@ -129,13 +129,13 @@ def test_coerce_outside_ns_bounds(self, invalid_date): with pytest.raises(ValueError): tslib.array_to_datetime(arr, errors='raise') - result = tslib.array_to_datetime(arr, errors='coerce')[0] + result, _ = tslib.array_to_datetime(arr, errors='coerce') expected = np.array([tslib.iNaT], dtype='M8[ns]') tm.assert_numpy_array_equal(result, expected) def test_coerce_outside_ns_bounds_one_valid(self): arr = np.array(['1/1/1000', '1/1/2000'], dtype=object) - result = tslib.array_to_datetime(arr, errors='coerce')[0] + result, _ = tslib.array_to_datetime(arr, errors='coerce') expected = [tslib.iNaT, '2000-01-01T00:00:00.000000000-0000'] tm.assert_numpy_array_equal( @@ -147,11 +147,11 @@ def test_coerce_of_invalid_datetimes(self): # Without coercing, the presence of any invalid dates prevents # any values from being converted - result = tslib.array_to_datetime(arr, errors='ignore')[0] + result, _ = tslib.array_to_datetime(arr, errors='ignore') tm.assert_numpy_array_equal(result, arr) # With coercing, the invalid dates becomes iNaT - result = tslib.array_to_datetime(arr, errors='coerce')[0] + result, _ = tslib.array_to_datetime(arr, errors='coerce') expected = ['2013-01-01T00:00:00.000000000-0000', tslib.iNaT, tslib.iNaT]