diff --git a/asv_bench/benchmarks/timeseries.py b/asv_bench/benchmarks/timeseries.py index eada401d2930b..2c98cc1659519 100644 --- a/asv_bench/benchmarks/timeseries.py +++ b/asv_bench/benchmarks/timeseries.py @@ -343,6 +343,25 @@ def time_iso8601_tz_spaceformat(self): to_datetime(self.strings_tz_space) +class ToDatetimeNONISO8601(object): + + goal_time = 0.2 + + def setup(self): + N = 10000 + half = int(N / 2) + ts_string_1 = 'March 1, 2018 12:00:00+0400' + ts_string_2 = 'March 1, 2018 12:00:00+0500' + self.same_offset = [ts_string_1] * N + self.diff_offset = [ts_string_1] * half + [ts_string_2] * half + + def time_same_offset(self): + to_datetime(self.same_offset) + + def time_different_offset(self): + to_datetime(self.diff_offset) + + class ToDatetimeFormat(object): goal_time = 0.2 diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 42e286f487a7d..d2d5d40393b62 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -224,6 +224,62 @@ For situations where you need an ``ndarray`` of ``Interval`` objects, use np.asarray(idx) idx.values.astype(object) +.. _whatsnew_0240.api.timezone_offset_parsing: + +Parsing Datetime Strings with Timezone Offsets +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Previously, parsing datetime strings with UTC offsets with :func:`to_datetime` +or :class:`DatetimeIndex` would automatically convert the datetime to UTC +without timezone localization. This is inconsistent from parsing the same +datetime string with :class:`Timestamp` which would preserve the UTC +offset in the ``tz`` attribute. Now, :func:`to_datetime` preserves the UTC +offset in the ``tz`` attribute when all the datetime strings have the same +UTC offset (:issue:`17697`, :issue:`11736`) + +*Previous Behavior*: + +.. code-block:: ipython + + + In [2]: pd.to_datetime("2015-11-18 15:30:00+05:30") + Out[2]: Timestamp('2015-11-18 10:00:00') + + In [3]: pd.Timestamp("2015-11-18 15:30:00+05:30") + Out[3]: Timestamp('2015-11-18 15:30:00+0530', tz='pytz.FixedOffset(330)') + + # Different UTC offsets would automatically convert the datetimes to UTC (without a UTC timezone) + In [4]: pd.to_datetime(["2015-11-18 15:30:00+05:30", "2015-11-18 16:30:00+06:30"]) + Out[4]: DatetimeIndex(['2015-11-18 10:00:00', '2015-11-18 10:00:00'], dtype='datetime64[ns]', freq=None) + +*Current Behavior*: + +.. ipython:: python + + pd.to_datetime("2015-11-18 15:30:00+05:30") + pd.Timestamp("2015-11-18 15:30:00+05:30") + +Parsing datetime strings with the same UTC offset will preserve the UTC offset in the ``tz`` + +.. ipython:: python + + pd.to_datetime(["2015-11-18 15:30:00+05:30"] * 2) + +Parsing datetime strings with different UTC offsets will now create an Index of +``datetime.datetime`` objects with different UTC offsets + +.. ipython:: python + + idx = pd.to_datetime(["2015-11-18 15:30:00+05:30", "2015-11-18 16:30:00+06:30"]) + idx + idx[0] + idx[1] + +Passing ``utc=True`` will mimic the previous behavior but will correctly indicate +that the dates have been converted to UTC + +.. ipython:: python + pd.to_datetime(["2015-11-18 15:30:00+05:30", "2015-11-18 16:30:00+06:30"], utc=True) .. _whatsnew_0240.api.datetimelike.normalize: @@ -439,6 +495,7 @@ Datetimelike - Fixed bug where two :class:`DateOffset` objects with different ``normalize`` attributes could evaluate as equal (:issue:`21404`) - Fixed bug where :meth:`Timestamp.resolution` incorrectly returned 1-microsecond ``timedelta`` instead of 1-nanosecond :class:`Timedelta` (:issue:`21336`,:issue:`21365`) +- Bug in :func:`to_datetime` that did not consistently return an :class:`Index` when ``box=True`` was specified (:issue:`21864`) Timedelta ^^^^^^^^^ diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index acf6cd4b74362..76e3d6e92d31e 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -19,6 +19,7 @@ import numpy as np cnp.import_array() import pytz +from dateutil.tz import tzlocal, tzutc as dateutil_utc from util cimport (is_integer_object, is_float_object, is_string_object, @@ -328,7 +329,7 @@ cpdef array_with_unit_to_datetime(ndarray values, unit, errors='coerce'): if unit == 'ns': if issubclass(values.dtype.type, np.integer): return values.astype('M8[ns]') - return array_to_datetime(values.astype(object), errors=errors) + return array_to_datetime(values.astype(object), errors=errors)[0] m = cast_from_unit(None, unit) @@ -457,9 +458,43 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', dayfirst=False, yearfirst=False, format=None, utc=None, require_iso8601=False): + """ + Converts a 1D array of date-like values to a numpy array of either: + 1) datetime64[ns] data + 2) datetime.datetime objects, if OutOfBoundsDatetime or TypeError + is encountered + + Also returns a pytz.FixedOffset if an array of strings with the same + timezone offset is passed and utc=True is not passed. Otherwise, None + is returned + + Handles datetime.date, datetime.datetime, np.datetime64 objects, numeric, + strings + + Parameters + ---------- + values : ndarray of object + date-like objects to convert + errors : str, default 'raise' + error behavior when parsing + dayfirst : bool, default False + dayfirst parsing behavior when encountering datetime strings + yearfirst : bool, default False + yearfirst parsing behavior when encountering datetime strings + format : str, default None + format of the string to parse + utc : bool, default None + indicator whether the dates should be UTC + require_iso8601 : bool, default False + indicator whether the datetime string should be iso8601 + + Returns + ------- + tuple (ndarray, tzoffset) + """ cdef: Py_ssize_t i, n = len(values) - object val, py_dt + object val, py_dt, tz, tz_out = None ndarray[int64_t] iresult ndarray[object] oresult npy_datetimestruct dts @@ -467,11 +502,14 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', bint seen_integer = 0 bint seen_string = 0 bint seen_datetime = 0 + bint seen_datetime_offset = 0 bint is_raise = errors=='raise' bint is_ignore = errors=='ignore' bint is_coerce = errors=='coerce' _TSObject _ts int out_local=0, out_tzoffset=0 + float offset_seconds + set out_tzoffset_vals = set() # specify error conditions assert is_raise or is_ignore or is_coerce @@ -584,7 +622,7 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', raise ValueError("time data {val} doesn't match " "format specified" .format(val=val)) - return values + return values, tz_out try: py_dt = parse_datetime_string(val, dayfirst=dayfirst, @@ -595,6 +633,30 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', continue raise TypeError("invalid string coercion to datetime") + # If the dateutil parser returned tzinfo, capture it + # to check if all arguments have the same tzinfo + tz = py_dt.tzinfo + if tz is not None: + seen_datetime_offset = 1 + if tz == dateutil_utc(): + # dateutil.tz.tzutc has no offset-like attribute + # Just add the 0 offset explicitly + out_tzoffset_vals.add(0) + elif tz == tzlocal(): + # is comparison fails unlike other dateutil.tz + # objects. Also, dateutil.tz.tzlocal has no + # _offset attribute like tzoffset + offset_seconds = tz._dst_offset.total_seconds() + out_tzoffset_vals.add(offset_seconds) + else: + # dateutil.tz.tzoffset objects cannot be hashed + # store the total_seconds() instead + offset_seconds = tz._offset.total_seconds() + out_tzoffset_vals.add(offset_seconds) + else: + # Add a marker for naive string, to track if we are + # parsing mixed naive and aware strings + out_tzoffset_vals.add('naive') try: _ts = convert_datetime_to_tsobject(py_dt, None) iresult[i] = _ts.value @@ -614,8 +676,17 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', # where we left off value = dtstruct_to_dt64(&dts) if out_local == 1: + seen_datetime_offset = 1 + # Store the out_tzoffset in seconds + # since we store the total_seconds of + # dateutil.tz.tzoffset objects + out_tzoffset_vals.add(out_tzoffset * 60.) tz = pytz.FixedOffset(out_tzoffset) value = tz_convert_single(value, tz, 'UTC') + else: + # Add a marker for naive string, to track if we are + # parsing mixed naive and aware strings + out_tzoffset_vals.add('naive') iresult[i] = value try: check_dts_bounds(&dts) @@ -631,7 +702,7 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', raise ValueError("time data {val} doesn't " "match format specified" .format(val=val)) - return values + return values, tz_out raise else: @@ -657,7 +728,21 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', else: raise TypeError - return result + if seen_datetime_offset and not utc_convert: + # GH 17697 + # 1) If all the offsets are equal, return one offset for + # the parsed dates to (maybe) pass to DatetimeIndex + # 2) If the offsets are different, then force the parsing down the + # object path where an array of datetimes + # (with individual dateutil.tzoffsets) are returned + is_same_offsets = len(out_tzoffset_vals) == 1 + if not is_same_offsets: + return array_to_datetime_object(values, is_raise, + dayfirst, yearfirst) + else: + tz_offset = out_tzoffset_vals.pop() + tz_out = pytz.FixedOffset(tz_offset / 60.) + return result, tz_out except OutOfBoundsDatetime: if is_raise: raise @@ -679,36 +764,67 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', oresult[i] = val.item() else: oresult[i] = val - return oresult + return oresult, tz_out except TypeError: - oresult = np.empty(n, dtype=object) + return array_to_datetime_object(values, is_raise, dayfirst, yearfirst) - for i in range(n): - val = values[i] - if checknull_with_nat(val): - oresult[i] = val - elif is_string_object(val): - if len(val) == 0 or val in nat_strings: - oresult[i] = 'NaT' - continue +cdef array_to_datetime_object(ndarray[object] values, bint is_raise, + dayfirst=False, yearfirst=False): + """ + Fall back function for array_to_datetime - try: - oresult[i] = parse_datetime_string(val, dayfirst=dayfirst, - yearfirst=yearfirst) - pydatetime_to_dt64(oresult[i], &dts) - check_dts_bounds(&dts) - except Exception: - if is_raise: - raise - return values - # oresult[i] = val - else: + Attempts to parse datetime strings with dateutil to return an array + of datetime objects + + Parameters + ---------- + values : ndarray of object + date-like objects to convert + is_raise : bool + error behavior when parsing + dayfirst : bool, default False + dayfirst parsing behavior when encountering datetime strings + yearfirst : bool, default False + yearfirst parsing behavior when encountering datetime strings + + Returns + ------- + tuple (ndarray, None) + """ + cdef: + Py_ssize_t i, n = len(values) + object val, + ndarray[object] oresult + npy_datetimestruct dts + + oresult = np.empty(n, dtype=object) + + # We return an object array and only attempt to parse: + # 1) NaT or NaT-like values + # 2) datetime strings, which we return as datetime.datetime + for i in range(n): + val = values[i] + if checknull_with_nat(val): + oresult[i] = val + elif is_string_object(val): + if len(val) == 0 or val in nat_strings: + oresult[i] = 'NaT' + continue + try: + oresult[i] = parse_datetime_string(val, dayfirst=dayfirst, + yearfirst=yearfirst) + pydatetime_to_dt64(oresult[i], &dts) + check_dts_bounds(&dts) + except (ValueError, OverflowError): if is_raise: raise - return values - - return oresult + return values, None + else: + if is_raise: + raise + return values, None + return oresult, None cdef inline bint _parse_today_now(str val, int64_t* iresult): diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index ead7b39309f5e..e369679d2146f 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -918,7 +918,7 @@ def try_datetime(v): # GH19671 v = tslib.array_to_datetime(v, require_iso8601=True, - errors='raise') + errors='raise')[0] except ValueError: # we might have a sequence of the same-datetimes with tz's diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index be042c9bf8ab0..90a083557a662 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -23,7 +23,8 @@ is_float, is_list_like, is_scalar, - is_numeric_dtype) + is_numeric_dtype, + is_object_dtype) from pandas.core.dtypes.generic import ( ABCIndexClass, ABCSeries, ABCDataFrame) @@ -266,7 +267,7 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None, result = arg if result is None and (format is None or infer_datetime_format): - result = tslib.array_to_datetime( + result, tz_parsed = tslib.array_to_datetime( arg, errors=errors, utc=tz == 'utc', @@ -274,9 +275,16 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None, yearfirst=yearfirst, require_iso8601=require_iso8601 ) + if tz_parsed is not None and box: + return DatetimeIndex._simple_new(result, name=name, + tz=tz_parsed) - if is_datetime64_dtype(result) and box: - result = DatetimeIndex(result, tz=tz, name=name) + if box: + if is_datetime64_dtype(result): + return DatetimeIndex(result, tz=tz, name=name) + elif is_object_dtype(result): + from pandas import Index + return Index(result, name=name) return result except ValueError as e: @@ -404,7 +412,7 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False, datetime.datetime objects as well). box : boolean, default True - - If True returns a DatetimeIndex + - If True returns a DatetimeIndex or Index-like object - If False returns ndarray of values. format : string, default None strftime to parse time, eg "%d/%m/%Y", note that "%f" will parse @@ -696,7 +704,7 @@ def calc(carg): parsed = parsing.try_parse_year_month_day(carg / 10000, carg / 100 % 100, carg % 100) - return tslib.array_to_datetime(parsed, errors=errors) + return tslib.array_to_datetime(parsed, errors=errors)[0] def calc_with_mask(carg, mask): result = np.empty(carg.shape, dtype='M8[ns]') diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py index 3ad25ae73109e..9e3b606f31973 100644 --- a/pandas/tests/frame/test_to_csv.py +++ b/pandas/tests/frame/test_to_csv.py @@ -154,7 +154,7 @@ def test_to_csv_from_csv5(self): self.tzframe.to_csv(path) result = pd.read_csv(path, index_col=0, parse_dates=['A']) - converter = lambda c: to_datetime(result[c]).dt.tz_localize( + converter = lambda c: to_datetime(result[c]).dt.tz_convert( 'UTC').dt.tz_convert(self.tzframe[c].dt.tz) result['B'] = converter('B') result['C'] = converter('C') @@ -1027,12 +1027,11 @@ def test_to_csv_with_dst_transitions(self): time_range = np.array(range(len(i)), dtype='int64') df = DataFrame({'A': time_range}, index=i) df.to_csv(path, index=True) - # we have to reconvert the index as we # don't parse the tz's result = read_csv(path, index_col=0) - result.index = to_datetime(result.index).tz_localize( - 'UTC').tz_convert('Europe/London') + result.index = to_datetime(result.index, utc=True).tz_convert( + 'Europe/London') assert_frame_equal(result, df) # GH11619 @@ -1043,9 +1042,9 @@ def test_to_csv_with_dst_transitions(self): with ensure_clean('csv_date_format_with_dst') as path: df.to_csv(path, index=True) result = read_csv(path, index_col=0) - result.index = to_datetime(result.index).tz_localize( - 'UTC').tz_convert('Europe/Paris') - result['idx'] = to_datetime(result['idx']).astype( + result.index = to_datetime(result.index, utc=True).tz_convert( + 'Europe/Paris') + result['idx'] = to_datetime(result['idx'], utc=True).astype( 'datetime64[ns, Europe/Paris]') assert_frame_equal(result, df) diff --git a/pandas/tests/indexes/datetimes/test_arithmetic.py b/pandas/tests/indexes/datetimes/test_arithmetic.py index 4ce2b1dd4fd86..1e54e6563d598 100644 --- a/pandas/tests/indexes/datetimes/test_arithmetic.py +++ b/pandas/tests/indexes/datetimes/test_arithmetic.py @@ -423,13 +423,13 @@ def test_dti_shift_tzaware(self, tz_naive_fixture): tm.assert_index_equal(idx.shift(0, freq='H'), idx) tm.assert_index_equal(idx.shift(3, freq='H'), idx) - idx = pd.DatetimeIndex(['2011-01-01 10:00', '2011-01-01 11:00' + idx = pd.DatetimeIndex(['2011-01-01 10:00', '2011-01-01 11:00', '2011-01-01 12:00'], name='xxx', tz=tz) tm.assert_index_equal(idx.shift(0, freq='H'), idx) - exp = pd.DatetimeIndex(['2011-01-01 13:00', '2011-01-01 14:00' + exp = pd.DatetimeIndex(['2011-01-01 13:00', '2011-01-01 14:00', '2011-01-01 15:00'], name='xxx', tz=tz) tm.assert_index_equal(idx.shift(3, freq='H'), exp) - exp = pd.DatetimeIndex(['2011-01-01 07:00', '2011-01-01 08:00' + exp = pd.DatetimeIndex(['2011-01-01 07:00', '2011-01-01 08:00', '2011-01-01 09:00'], name='xxx', tz=tz) tm.assert_index_equal(idx.shift(-3, freq='H'), exp) diff --git a/pandas/tests/indexes/datetimes/test_timezones.py b/pandas/tests/indexes/datetimes/test_timezones.py index 3697d183d2fc6..67eb81336f648 100644 --- a/pandas/tests/indexes/datetimes/test_timezones.py +++ b/pandas/tests/indexes/datetimes/test_timezones.py @@ -317,8 +317,8 @@ def test_dti_tz_localize_nonexistent_raise_coerce(self): result = index.tz_localize(tz=tz, errors='coerce') test_times = ['2015-03-08 01:00-05:00', 'NaT', '2015-03-08 03:00-04:00'] - dti = DatetimeIndex(test_times) - expected = dti.tz_localize('UTC').tz_convert('US/Eastern') + dti = to_datetime(test_times, utc=True) + expected = dti.tz_convert('US/Eastern') tm.assert_index_equal(result, expected) @pytest.mark.parametrize('tz', [pytz.timezone('US/Eastern'), diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index fa9f9fc90387a..72e5358f21966 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -7,6 +7,7 @@ import dateutil import numpy as np from dateutil.parser import parse +from dateutil.tz.tz import tzoffset from datetime import datetime, time from distutils.version import LooseVersion @@ -483,7 +484,7 @@ def test_to_datetime_tz_psycopg2(self, cache): # dtype coercion i = pd.DatetimeIndex([ - '2000-01-01 08:00:00+00:00' + '2000-01-01 08:00:00' ], tz=psycopg2.tz.FixedOffsetTimezone(offset=-300, name=None)) assert is_datetime64_ns_dtype(i) @@ -577,6 +578,48 @@ def test_week_without_day_and_calendar_year(self, date, format): with tm.assert_raises_regex(ValueError, msg): pd.to_datetime(date, format=format) + def test_iso_8601_strings_with_same_offset(self): + # GH 17697, 11736 + ts_str = "2015-11-18 15:30:00+05:30" + result = to_datetime(ts_str) + expected = Timestamp(ts_str) + assert result == expected + + expected = DatetimeIndex([Timestamp(ts_str)] * 2) + result = to_datetime([ts_str] * 2) + tm.assert_index_equal(result, expected) + + result = DatetimeIndex([ts_str] * 2) + tm.assert_index_equal(result, expected) + + def test_iso_8601_strings_with_different_offsets(self): + # GH 17697, 11736 + ts_strings = ["2015-11-18 15:30:00+05:30", + "2015-11-18 16:30:00+06:30", + NaT] + result = to_datetime(ts_strings) + expected = np.array([datetime(2015, 11, 18, 15, 30, + tzinfo=tzoffset(None, 19800)), + datetime(2015, 11, 18, 16, 30, + tzinfo=tzoffset(None, 23400)), + NaT], + dtype=object) + # GH 21864 + expected = Index(expected) + tm.assert_index_equal(result, expected) + + result = to_datetime(ts_strings, utc=True) + expected = DatetimeIndex([Timestamp(2015, 11, 18, 10), + Timestamp(2015, 11, 18, 10), + NaT], tz='UTC') + tm.assert_index_equal(result, expected) + + def test_non_iso_strings_with_tz_offset(self): + result = to_datetime(['March 1, 2018 12:00:00+0400'] * 2) + expected = DatetimeIndex([datetime(2018, 3, 1, 12, + tzinfo=pytz.FixedOffset(240))] * 2) + tm.assert_index_equal(result, expected) + class TestToDatetimeUnit(object): @pytest.mark.parametrize('cache', [True, False]) @@ -978,14 +1021,19 @@ def test_to_datetime_types(self, cache): # assert result == expected @pytest.mark.parametrize('cache', [True, False]) - def test_to_datetime_unprocessable_input(self, cache): + @pytest.mark.parametrize('box, klass, assert_method', [ + [True, Index, 'assert_index_equal'], + [False, np.array, 'assert_numpy_array_equal'] + ]) + def test_to_datetime_unprocessable_input(self, cache, box, klass, + assert_method): # GH 4928 - tm.assert_numpy_array_equal( - to_datetime([1, '1'], errors='ignore', cache=cache), - np.array([1, '1'], dtype='O') - ) + # GH 21864 + result = to_datetime([1, '1'], errors='ignore', cache=cache, box=box) + expected = klass(np.array([1, '1'], dtype='O')) + getattr(tm, assert_method)(result, expected) pytest.raises(TypeError, to_datetime, [1, '1'], errors='raise', - cache=cache) + cache=cache, box=box) def test_to_datetime_other_datetime64_units(self): # 5/25/2012 @@ -1031,7 +1079,7 @@ def test_string_na_nat_conversion(self, cache): else: expected[i] = parse_date(val) - result = tslib.array_to_datetime(strings) + result = tslib.array_to_datetime(strings)[0] tm.assert_almost_equal(result, expected) result2 = to_datetime(strings, cache=cache) @@ -1046,7 +1094,9 @@ def test_string_na_nat_conversion(self, cache): cache=cache)) result = to_datetime(malformed, errors='ignore', cache=cache) - tm.assert_numpy_array_equal(result, malformed) + # GH 21864 + expected = Index(malformed) + tm.assert_index_equal(result, expected) pytest.raises(ValueError, to_datetime, malformed, errors='raise', cache=cache) @@ -1495,23 +1545,19 @@ def test_parsers_time(self): assert res == expected_arr @pytest.mark.parametrize('cache', [True, False]) - def test_parsers_timezone_minute_offsets_roundtrip(self, cache): + @pytest.mark.parametrize('dt_string, tz, dt_string_repr', [ + ('2013-01-01 05:45+0545', pytz.FixedOffset(345), + "Timestamp('2013-01-01 05:45:00+0545', tz='pytz.FixedOffset(345)')"), + ('2013-01-01 05:30+0530', pytz.FixedOffset(330), + "Timestamp('2013-01-01 05:30:00+0530', tz='pytz.FixedOffset(330)')")]) + def test_parsers_timezone_minute_offsets_roundtrip(self, cache, dt_string, + tz, dt_string_repr): # GH11708 base = to_datetime("2013-01-01 00:00:00", cache=cache) - dt_strings = [ - ('2013-01-01 05:45+0545', - "Asia/Katmandu", - "Timestamp('2013-01-01 05:45:00+0545', tz='Asia/Katmandu')"), - ('2013-01-01 05:30+0530', - "Asia/Kolkata", - "Timestamp('2013-01-01 05:30:00+0530', tz='Asia/Kolkata')") - ] - - for dt_string, tz, dt_string_repr in dt_strings: - dt_time = to_datetime(dt_string, cache=cache) - assert base == dt_time - converted_time = dt_time.tz_localize('UTC').tz_convert(tz) - assert dt_string_repr == repr(converted_time) + base = base.tz_localize('UTC').tz_convert(tz) + dt_time = to_datetime(dt_string, cache=cache) + assert base == dt_time + assert dt_string_repr == repr(dt_time) @pytest.fixture(params=['D', 's', 'ms', 'us', 'ns']) diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index a59836eb70d24..762b04cc3bd4f 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -2324,7 +2324,7 @@ def test_concat_datetime_timezone(self): '2011-01-01 01:00:00+01:00', '2011-01-01 02:00:00+01:00'], freq='H' - ).tz_localize('UTC').tz_convert('Europe/Paris') + ).tz_convert('UTC').tz_convert('Europe/Paris') expected = pd.DataFrame([[1, 1], [2, 2], [3, 3]], index=exp_idx, columns=['a', 'b']) @@ -2342,7 +2342,7 @@ def test_concat_datetime_timezone(self): '2010-12-31 23:00:00+00:00', '2011-01-01 00:00:00+00:00', '2011-01-01 01:00:00+00:00'] - ).tz_localize('UTC') + ) expected = pd.DataFrame([[np.nan, 1], [np.nan, 2], [np.nan, 3], [1, np.nan], [2, np.nan], [3, np.nan]], diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 7ce2aaf7d7fbb..796c637434353 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -332,7 +332,8 @@ def test_datetime64_dtype_array_returned(self): dt_index = pd.to_datetime(['2015-01-03T00:00:00.000000000+0000', '2015-01-01T00:00:00.000000000+0000', - '2015-01-01T00:00:00.000000000+0000']) + '2015-01-01T00:00:00.000000000+0000'], + box=False) result = algos.unique(dt_index) tm.assert_numpy_array_equal(result, expected) assert result.dtype == expected.dtype diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index 31e5bd88523d2..b7530da36ed8b 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -668,16 +668,15 @@ def test_value_counts_datetime64(self, klass): s = klass(df['dt'].copy()) s.name = None - - idx = pd.to_datetime(['2010-01-01 00:00:00Z', - '2008-09-09 00:00:00Z', - '2009-01-01 00:00:00Z']) + idx = pd.to_datetime(['2010-01-01 00:00:00', + '2008-09-09 00:00:00', + '2009-01-01 00:00:00']) expected_s = Series([3, 2, 1], index=idx) tm.assert_series_equal(s.value_counts(), expected_s) - expected = np_array_datetime64_compat(['2010-01-01 00:00:00Z', - '2009-01-01 00:00:00Z', - '2008-09-09 00:00:00Z'], + expected = np_array_datetime64_compat(['2010-01-01 00:00:00', + '2009-01-01 00:00:00', + '2008-09-09 00:00:00'], dtype='datetime64[ns]') if isinstance(s, Index): tm.assert_index_equal(s.unique(), DatetimeIndex(expected)) diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py index d664a9060b684..1f70d09e43b37 100644 --- a/pandas/tests/test_resample.py +++ b/pandas/tests/test_resample.py @@ -2681,8 +2681,8 @@ def test_resample_with_dst_time_change(self): '2016-03-14 13:00:00-05:00', '2016-03-15 01:00:00-05:00', '2016-03-15 13:00:00-05:00'] - index = pd.DatetimeIndex(expected_index_values, - tz='UTC').tz_convert('America/Chicago') + index = pd.to_datetime(expected_index_values, utc=True).tz_convert( + 'America/Chicago') expected = pd.DataFrame([1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0], index=index) diff --git a/pandas/tests/tslibs/test_array_to_datetime.py b/pandas/tests/tslibs/test_array_to_datetime.py index eb77e52e7c91d..915687304bfe2 100644 --- a/pandas/tests/tslibs/test_array_to_datetime.py +++ b/pandas/tests/tslibs/test_array_to_datetime.py @@ -3,6 +3,8 @@ import numpy as np import pytest +import pytz +from dateutil.tz.tz import tzoffset from pandas._libs import tslib from pandas.compat.numpy import np_array_datetime64_compat @@ -52,7 +54,7 @@ def test_parsers_iso8601_invalid(self, date_str): class TestArrayToDatetime(object): def test_parsing_valid_dates(self): arr = np.array(['01-01-2013', '01-02-2013'], dtype=object) - result = tslib.array_to_datetime(arr) + result, _ = tslib.array_to_datetime(arr) expected = ['2013-01-01T00:00:00.000000000-0000', '2013-01-02T00:00:00.000000000-0000'] tm.assert_numpy_array_equal( @@ -60,38 +62,60 @@ def test_parsing_valid_dates(self): np_array_datetime64_compat(expected, dtype='M8[ns]')) arr = np.array(['Mon Sep 16 2013', 'Tue Sep 17 2013'], dtype=object) - result = tslib.array_to_datetime(arr) + result, _ = tslib.array_to_datetime(arr) expected = ['2013-09-16T00:00:00.000000000-0000', '2013-09-17T00:00:00.000000000-0000'] tm.assert_numpy_array_equal( result, np_array_datetime64_compat(expected, dtype='M8[ns]')) - @pytest.mark.parametrize('dt_string', [ - '01-01-2013 08:00:00+08:00', - '2013-01-01T08:00:00.000000000+0800', - '2012-12-31T16:00:00.000000000-0800', - '12-31-2012 23:00:00-01:00']) - def test_parsing_timezone_offsets(self, dt_string): + @pytest.mark.parametrize('dt_string, expected_tz', [ + ['01-01-2013 08:00:00+08:00', pytz.FixedOffset(480)], + ['2013-01-01T08:00:00.000000000+0800', pytz.FixedOffset(480)], + ['2012-12-31T16:00:00.000000000-0800', pytz.FixedOffset(-480)], + ['12-31-2012 23:00:00-01:00', pytz.FixedOffset(-60)]]) + def test_parsing_timezone_offsets(self, dt_string, expected_tz): # All of these datetime strings with offsets are equivalent # to the same datetime after the timezone offset is added arr = np.array(['01-01-2013 00:00:00'], dtype=object) - expected = tslib.array_to_datetime(arr) + expected, _ = tslib.array_to_datetime(arr) arr = np.array([dt_string], dtype=object) - result = tslib.array_to_datetime(arr) + result, result_tz = tslib.array_to_datetime(arr) tm.assert_numpy_array_equal(result, expected) + assert result_tz is expected_tz + + def test_parsing_non_iso_timezone_offset(self): + dt_string = '01-01-2013T00:00:00.000000000+0000' + arr = np.array([dt_string], dtype=object) + result, result_tz = tslib.array_to_datetime(arr) + expected = np.array([np.datetime64('2013-01-01 00:00:00.000000000')]) + tm.assert_numpy_array_equal(result, expected) + assert result_tz is pytz.FixedOffset(0) + + def test_parsing_different_timezone_offsets(self): + # GH 17697 + data = ["2015-11-18 15:30:00+05:30", "2015-11-18 15:30:00+06:30"] + data = np.array(data, dtype=object) + result, result_tz = tslib.array_to_datetime(data) + expected = np.array([datetime(2015, 11, 18, 15, 30, + tzinfo=tzoffset(None, 19800)), + datetime(2015, 11, 18, 15, 30, + tzinfo=tzoffset(None, 23400))], + dtype=object) + tm.assert_numpy_array_equal(result, expected) + assert result_tz is None def test_number_looking_strings_not_into_datetime(self): # GH#4601 # These strings don't look like datetimes so they shouldn't be # attempted to be converted arr = np.array(['-352.737091', '183.575577'], dtype=object) - result = tslib.array_to_datetime(arr, errors='ignore') + result, _ = tslib.array_to_datetime(arr, errors='ignore') tm.assert_numpy_array_equal(result, arr) arr = np.array(['1', '2', '3', '4', '5'], dtype=object) - result = tslib.array_to_datetime(arr, errors='ignore') + result, _ = tslib.array_to_datetime(arr, errors='ignore') tm.assert_numpy_array_equal(result, arr) @pytest.mark.parametrize('invalid_date', [ @@ -105,13 +129,13 @@ def test_coerce_outside_ns_bounds(self, invalid_date): with pytest.raises(ValueError): tslib.array_to_datetime(arr, errors='raise') - result = tslib.array_to_datetime(arr, errors='coerce') + result, _ = tslib.array_to_datetime(arr, errors='coerce') expected = np.array([tslib.iNaT], dtype='M8[ns]') tm.assert_numpy_array_equal(result, expected) def test_coerce_outside_ns_bounds_one_valid(self): arr = np.array(['1/1/1000', '1/1/2000'], dtype=object) - result = tslib.array_to_datetime(arr, errors='coerce') + result, _ = tslib.array_to_datetime(arr, errors='coerce') expected = [tslib.iNaT, '2000-01-01T00:00:00.000000000-0000'] tm.assert_numpy_array_equal( @@ -123,11 +147,11 @@ def test_coerce_of_invalid_datetimes(self): # Without coercing, the presence of any invalid dates prevents # any values from being converted - result = tslib.array_to_datetime(arr, errors='ignore') + result, _ = tslib.array_to_datetime(arr, errors='ignore') tm.assert_numpy_array_equal(result, arr) # With coercing, the invalid dates becomes iNaT - result = tslib.array_to_datetime(arr, errors='coerce') + result, _ = tslib.array_to_datetime(arr, errors='coerce') expected = ['2013-01-01T00:00:00.000000000-0000', tslib.iNaT, tslib.iNaT]