diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index d7feb6e547b22..0f5ef1caf1d96 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -237,7 +237,7 @@ without timezone localization. This is inconsistent from parsing the same datetime string with :class:`Timestamp` which would preserve the UTC offset in the ``tz`` attribute. Now, :func:`to_datetime` preserves the UTC offset in the ``tz`` attribute when all the datetime strings have the same -UTC offset (:issue:`17697`, :issue:`11736`) +UTC offset (:issue:`17697`, :issue:`11736`, :issue:`22457`) *Previous Behavior*: diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 90a083557a662..57387b9ea870a 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -275,14 +275,25 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None, yearfirst=yearfirst, require_iso8601=require_iso8601 ) - if tz_parsed is not None and box: - return DatetimeIndex._simple_new(result, name=name, - tz=tz_parsed) + if tz_parsed is not None: + if box: + # We can take a shortcut since the datetime64 numpy array + # is in UTC + return DatetimeIndex._simple_new(result, name=name, + tz=tz_parsed) + else: + # Convert the datetime64 numpy array to an numpy array + # of datetime objects + result = [Timestamp(ts, tz=tz_parsed).to_pydatetime() + for ts in result] + return np.array(result, dtype=object) if box: + # Ensure we return an Index in all cases where box=True if is_datetime64_dtype(result): return DatetimeIndex(result, tz=tz, name=name) elif is_object_dtype(result): + # e.g. an Index of datetime objects from pandas import Index return Index(result, name=name) return result diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index 72e5358f21966..bef9b73773f46 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -592,6 +592,17 @@ def test_iso_8601_strings_with_same_offset(self): result = DatetimeIndex([ts_str] * 2) tm.assert_index_equal(result, expected) + def test_iso_8601_strings_same_offset_no_box(self): + # GH 22446 + data = ['2018-01-04 09:01:00+09:00', '2018-01-04 09:02:00+09:00'] + result = pd.to_datetime(data, box=False) + expected = np.array([ + datetime(2018, 1, 4, 9, 1, tzinfo=pytz.FixedOffset(540)), + datetime(2018, 1, 4, 9, 2, tzinfo=pytz.FixedOffset(540)) + ], + dtype=object) + tm.assert_numpy_array_equal(result, expected) + def test_iso_8601_strings_with_different_offsets(self): # GH 17697, 11736 ts_strings = ["2015-11-18 15:30:00+05:30", diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 9a6fa70892e26..64d2e155aa9a9 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -330,10 +330,9 @@ def test_datetime64_dtype_array_returned(self): '2015-01-01T00:00:00.000000000+0000'], dtype='M8[ns]') - dt_index = pd.to_datetime(['2015-01-03T00:00:00.000000000+0000', - '2015-01-01T00:00:00.000000000+0000', - '2015-01-01T00:00:00.000000000+0000'], - box=False) + dt_index = pd.to_datetime(['2015-01-03T00:00:00.000000000', + '2015-01-01T00:00:00.000000000', + '2015-01-01T00:00:00.000000000']) result = algos.unique(dt_index) tm.assert_numpy_array_equal(result, expected) assert result.dtype == expected.dtype