diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 957b4f6284d74..e8a774579b8ae 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -290,6 +290,7 @@ Timezones - Bug in :func:`Series.at` where setting :class:`Timestamp` with timezone raises ``TypeError`` (:issue:`25506`) - Bug in :func:`DataFrame.update` when updating with timezone aware data would return timezone naive data (:issue:`25807`) - Bug in :func:`to_datetime` where an uninformative ``RuntimeError`` was raised when passing a naive :class:`Timestamp` with datetime strings with mixed UTC offsets (:issue:`25978`) +- Bug in :func:`to_datetime` with ``unit='ns'`` would drop timezone information from the parsed argument (:issue:`26168`) Numeric ^^^^^^^ @@ -371,6 +372,7 @@ I/O - Fixed bug in loading objects from S3 that contain ``#`` characters in the URL (:issue:`25945`) - Adds ``use_bqstorage_api`` parameter to :func:`read_gbq` to speed up downloads of large data frames. This feature requires version 0.10.0 of the ``pandas-gbq`` library as well as the ``google-cloud-bigquery-storage`` and ``fastavro`` libraries. (:issue:`26104`) - Fixed memory leak in :meth:`DataFrame.to_json` when dealing with numeric data (:issue:`24889`) +- Bug in :func:`read_json` where date strings with ``Z`` were not converted to a UTC timezone (:issue:`26168`) Plotting ^^^^^^^^ diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 50e3fb1c38cc7..89b24b81b5964 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -311,6 +311,10 @@ def array_with_unit_to_datetime(ndarray values, object unit, - ignore: return non-convertible values as the same unit - coerce: NaT for non-convertibles + Returns + ------- + result : ndarray of m8 values + tz : parsed timezone offset or None """ cdef: Py_ssize_t i, j, n=len(values) @@ -323,13 +327,15 @@ def array_with_unit_to_datetime(ndarray values, object unit, bint need_to_iterate = True ndarray[int64_t] iresult ndarray[object] oresult + object tz = None assert is_ignore or is_coerce or is_raise if unit == 'ns': if issubclass(values.dtype.type, np.integer): - return values.astype('M8[ns]') - return array_to_datetime(values.astype(object), errors=errors)[0] + return values.astype('M8[ns]'), tz + # This will return a tz + return array_to_datetime(values.astype(object), errors=errors) m = cast_from_unit(None, unit) @@ -357,7 +363,7 @@ def array_with_unit_to_datetime(ndarray values, object unit, result = (iresult * m).astype('M8[ns]') iresult = result.view('i8') iresult[mask] = NPY_NAT - return result + return result, tz result = np.empty(n, dtype='M8[ns]') iresult = result.view('i8') @@ -419,7 +425,7 @@ def array_with_unit_to_datetime(ndarray values, object unit, iresult[i] = NPY_NAT - return result + return result, tz except AssertionError: pass @@ -451,7 +457,7 @@ def array_with_unit_to_datetime(ndarray values, object unit, else: oresult[i] = val - return oresult + return oresult, tz @cython.wraparound(False) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 1ad39e7ad357a..0b0916026cd30 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -200,19 +200,27 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None, if format is not None: raise ValueError("cannot specify both format and unit") arg = getattr(arg, 'values', arg) - result = tslib.array_with_unit_to_datetime(arg, unit, - errors=errors) + result, tz_parsed = tslib.array_with_unit_to_datetime(arg, unit, + errors=errors) if box: if errors == 'ignore': from pandas import Index result = Index(result, name=name) - # GH 23758: We may still need to localize the result with tz - try: - return result.tz_localize(tz) - except AttributeError: - return result - - return DatetimeIndex(result, tz=tz, name=name) + else: + result = DatetimeIndex(result, name=name) + # GH 23758: We may still need to localize the result with tz + # GH 25546: Apply tz_parsed first (from arg), then tz (from caller) + # result will be naive but in UTC + try: + result = result.tz_localize('UTC').tz_convert(tz_parsed) + except AttributeError: + # Regular Index from 'ignore' path + return result + if tz is not None: + if result.tz is None: + result = result.tz_localize(tz) + else: + result = result.tz_convert(tz) return result elif getattr(arg, 'ndim', 1) > 1: raise TypeError('arg must be a string, datetime, list, tuple, ' diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index d6627f0fb8b72..512f5e2b14821 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -1967,3 +1967,16 @@ def test_processing_order(self): result = pd.to_datetime(300 * 365, unit='D', origin='1870-01-01') expected = Timestamp('2169-10-20 00:00:00') assert result == expected + + @pytest.mark.parametrize('offset,utc,exp', [ + ["Z", True, "2019-01-01T00:00:00.000Z"], + ["Z", None, "2019-01-01T00:00:00.000Z"], + ["-01:00", True, "2019-01-01T01:00:00.000Z"], + ["-01:00", None, "2019-01-01T00:00:00.000-01:00"], + ]) + def test_arg_tz_ns_unit(self, offset, utc, exp): + # GH 25546 + arg = "2019-01-01T00:00:00.000" + offset + result = to_datetime([arg], unit='ns', utc=utc) + expected = to_datetime([exp]) + tm.assert_index_equal(result, expected) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index c18386b3f3700..40ca3cd9cfb1d 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -762,7 +762,10 @@ def test_w_date(date, date_unit=None): else: json = df.to_json(date_format='iso') result = read_json(json) - assert_frame_equal(result, df) + expected = df.copy() + expected.index = expected.index.tz_localize('UTC') + expected['date'] = expected['date'].dt.tz_localize('UTC') + assert_frame_equal(result, expected) test_w_date('20130101 20:43:42.123') test_w_date('20130101 20:43:42', date_unit='s') @@ -784,7 +787,10 @@ def test_w_date(date, date_unit=None): else: json = ts.to_json(date_format='iso') result = read_json(json, typ='series') - assert_series_equal(result, ts) + expected = ts.copy() + expected.index = expected.index.tz_localize('UTC') + expected = expected.dt.tz_localize('UTC') + assert_series_equal(result, expected) test_w_date('20130101 20:43:42.123') test_w_date('20130101 20:43:42', date_unit='s') @@ -880,11 +886,15 @@ def test_round_trip_exception_(self): @network @pytest.mark.single - def test_url(self): + @pytest.mark.parametrize('field,dtype', [ + ['created_at', pd.DatetimeTZDtype(tz='UTC')], + ['closed_at', 'datetime64[ns]'], + ['updated_at', pd.DatetimeTZDtype(tz='UTC')] + ]) + def test_url(self, field, dtype): url = 'https://api.github.com/repos/pandas-dev/pandas/issues?per_page=5' # noqa result = read_json(url, convert_dates=True) - for c in ['created_at', 'closed_at', 'updated_at']: - assert result[c].dtype == 'datetime64[ns]' + assert result[field].dtype == dtype def test_timedelta(self): converter = lambda x: pd.to_timedelta(x, unit='ms') @@ -1298,3 +1308,12 @@ def test_index_false_from_json_to_json(self, orient, index): dfjson = expected.to_json(orient=orient, index=index) result = read_json(dfjson, orient=orient) assert_frame_equal(result, expected) + + def test_read_timezone_information(self): + # GH 25546 + result = read_json('{"2019-01-01T11:00:00.000Z":88}', + typ='series', orient='index') + expected = Series([88], + index=DatetimeIndex(['2019-01-01 11:00:00'], + tz='UTC')) + assert_series_equal(result, expected)