Skip to content

Commit 64c1127

Browse files
mroeschkeWillAyd
authored andcommitted
BUG: read_json converted date strings with Z to UTC (#26170)
1 parent 5eead57 commit 64c1127

File tree

5 files changed

+67
-19
lines changed

5 files changed

+67
-19
lines changed

doc/source/whatsnew/v0.25.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -292,6 +292,7 @@ Timezones
292292
- Bug in :func:`Series.at` where setting :class:`Timestamp` with timezone raises ``TypeError`` (:issue:`25506`)
293293
- Bug in :func:`DataFrame.update` when updating with timezone aware data would return timezone naive data (:issue:`25807`)
294294
- Bug in :func:`to_datetime` where an uninformative ``RuntimeError`` was raised when passing a naive :class:`Timestamp` with datetime strings with mixed UTC offsets (:issue:`25978`)
295+
- Bug in :func:`to_datetime` with ``unit='ns'`` would drop timezone information from the parsed argument (:issue:`26168`)
295296

296297
Numeric
297298
^^^^^^^
@@ -373,6 +374,7 @@ I/O
373374
- Fixed bug in loading objects from S3 that contain ``#`` characters in the URL (:issue:`25945`)
374375
- Adds ``use_bqstorage_api`` parameter to :func:`read_gbq` to speed up downloads of large data frames. This feature requires version 0.10.0 of the ``pandas-gbq`` library as well as the ``google-cloud-bigquery-storage`` and ``fastavro`` libraries. (:issue:`26104`)
375376
- Fixed memory leak in :meth:`DataFrame.to_json` when dealing with numeric data (:issue:`24889`)
377+
- Bug in :func:`read_json` where date strings with ``Z`` were not converted to a UTC timezone (:issue:`26168`)
376378

377379
Plotting
378380
^^^^^^^^

pandas/_libs/tslib.pyx

+11-5
Original file line numberDiff line numberDiff line change
@@ -311,6 +311,10 @@ def array_with_unit_to_datetime(ndarray values, object unit,
311311
- ignore: return non-convertible values as the same unit
312312
- coerce: NaT for non-convertibles
313313
314+
Returns
315+
-------
316+
result : ndarray of m8 values
317+
tz : parsed timezone offset or None
314318
"""
315319
cdef:
316320
Py_ssize_t i, j, n=len(values)
@@ -323,13 +327,15 @@ def array_with_unit_to_datetime(ndarray values, object unit,
323327
bint need_to_iterate = True
324328
ndarray[int64_t] iresult
325329
ndarray[object] oresult
330+
object tz = None
326331

327332
assert is_ignore or is_coerce or is_raise
328333

329334
if unit == 'ns':
330335
if issubclass(values.dtype.type, np.integer):
331-
return values.astype('M8[ns]')
332-
return array_to_datetime(values.astype(object), errors=errors)[0]
336+
return values.astype('M8[ns]'), tz
337+
# This will return a tz
338+
return array_to_datetime(values.astype(object), errors=errors)
333339

334340
m = cast_from_unit(None, unit)
335341

@@ -357,7 +363,7 @@ def array_with_unit_to_datetime(ndarray values, object unit,
357363
result = (iresult * m).astype('M8[ns]')
358364
iresult = result.view('i8')
359365
iresult[mask] = NPY_NAT
360-
return result
366+
return result, tz
361367

362368
result = np.empty(n, dtype='M8[ns]')
363369
iresult = result.view('i8')
@@ -419,7 +425,7 @@ def array_with_unit_to_datetime(ndarray values, object unit,
419425

420426
iresult[i] = NPY_NAT
421427

422-
return result
428+
return result, tz
423429

424430
except AssertionError:
425431
pass
@@ -451,7 +457,7 @@ def array_with_unit_to_datetime(ndarray values, object unit,
451457
else:
452458
oresult[i] = val
453459

454-
return oresult
460+
return oresult, tz
455461

456462

457463
@cython.wraparound(False)

pandas/core/tools/datetimes.py

+17-9
Original file line numberDiff line numberDiff line change
@@ -200,19 +200,27 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None,
200200
if format is not None:
201201
raise ValueError("cannot specify both format and unit")
202202
arg = getattr(arg, 'values', arg)
203-
result = tslib.array_with_unit_to_datetime(arg, unit,
204-
errors=errors)
203+
result, tz_parsed = tslib.array_with_unit_to_datetime(arg, unit,
204+
errors=errors)
205205
if box:
206206
if errors == 'ignore':
207207
from pandas import Index
208208
result = Index(result, name=name)
209-
# GH 23758: We may still need to localize the result with tz
210-
try:
211-
return result.tz_localize(tz)
212-
except AttributeError:
213-
return result
214-
215-
return DatetimeIndex(result, tz=tz, name=name)
209+
else:
210+
result = DatetimeIndex(result, name=name)
211+
# GH 23758: We may still need to localize the result with tz
212+
# GH 25546: Apply tz_parsed first (from arg), then tz (from caller)
213+
# result will be naive but in UTC
214+
try:
215+
result = result.tz_localize('UTC').tz_convert(tz_parsed)
216+
except AttributeError:
217+
# Regular Index from 'ignore' path
218+
return result
219+
if tz is not None:
220+
if result.tz is None:
221+
result = result.tz_localize(tz)
222+
else:
223+
result = result.tz_convert(tz)
216224
return result
217225
elif getattr(arg, 'ndim', 1) > 1:
218226
raise TypeError('arg must be a string, datetime, list, tuple, '

pandas/tests/indexes/datetimes/test_tools.py

+13
Original file line numberDiff line numberDiff line change
@@ -1980,3 +1980,16 @@ def test_processing_order(self):
19801980
result = pd.to_datetime(300 * 365, unit='D', origin='1870-01-01')
19811981
expected = Timestamp('2169-10-20 00:00:00')
19821982
assert result == expected
1983+
1984+
@pytest.mark.parametrize('offset,utc,exp', [
1985+
["Z", True, "2019-01-01T00:00:00.000Z"],
1986+
["Z", None, "2019-01-01T00:00:00.000Z"],
1987+
["-01:00", True, "2019-01-01T01:00:00.000Z"],
1988+
["-01:00", None, "2019-01-01T00:00:00.000-01:00"],
1989+
])
1990+
def test_arg_tz_ns_unit(self, offset, utc, exp):
1991+
# GH 25546
1992+
arg = "2019-01-01T00:00:00.000" + offset
1993+
result = to_datetime([arg], unit='ns', utc=utc)
1994+
expected = to_datetime([exp])
1995+
tm.assert_index_equal(result, expected)

pandas/tests/io/json/test_pandas.py

+24-5
Original file line numberDiff line numberDiff line change
@@ -762,7 +762,10 @@ def test_w_date(date, date_unit=None):
762762
else:
763763
json = df.to_json(date_format='iso')
764764
result = read_json(json)
765-
assert_frame_equal(result, df)
765+
expected = df.copy()
766+
expected.index = expected.index.tz_localize('UTC')
767+
expected['date'] = expected['date'].dt.tz_localize('UTC')
768+
assert_frame_equal(result, expected)
766769

767770
test_w_date('20130101 20:43:42.123')
768771
test_w_date('20130101 20:43:42', date_unit='s')
@@ -784,7 +787,10 @@ def test_w_date(date, date_unit=None):
784787
else:
785788
json = ts.to_json(date_format='iso')
786789
result = read_json(json, typ='series')
787-
assert_series_equal(result, ts)
790+
expected = ts.copy()
791+
expected.index = expected.index.tz_localize('UTC')
792+
expected = expected.dt.tz_localize('UTC')
793+
assert_series_equal(result, expected)
788794

789795
test_w_date('20130101 20:43:42.123')
790796
test_w_date('20130101 20:43:42', date_unit='s')
@@ -880,11 +886,15 @@ def test_round_trip_exception_(self):
880886

881887
@network
882888
@pytest.mark.single
883-
def test_url(self):
889+
@pytest.mark.parametrize('field,dtype', [
890+
['created_at', pd.DatetimeTZDtype(tz='UTC')],
891+
['closed_at', 'datetime64[ns]'],
892+
['updated_at', pd.DatetimeTZDtype(tz='UTC')]
893+
])
894+
def test_url(self, field, dtype):
884895
url = 'https://api.github.com/repos/pandas-dev/pandas/issues?per_page=5' # noqa
885896
result = read_json(url, convert_dates=True)
886-
for c in ['created_at', 'closed_at', 'updated_at']:
887-
assert result[c].dtype == 'datetime64[ns]'
897+
assert result[field].dtype == dtype
888898

889899
def test_timedelta(self):
890900
converter = lambda x: pd.to_timedelta(x, unit='ms')
@@ -1298,3 +1308,12 @@ def test_index_false_from_json_to_json(self, orient, index):
12981308
dfjson = expected.to_json(orient=orient, index=index)
12991309
result = read_json(dfjson, orient=orient)
13001310
assert_frame_equal(result, expected)
1311+
1312+
def test_read_timezone_information(self):
1313+
# GH 25546
1314+
result = read_json('{"2019-01-01T11:00:00.000Z":88}',
1315+
typ='series', orient='index')
1316+
expected = Series([88],
1317+
index=DatetimeIndex(['2019-01-01 11:00:00'],
1318+
tz='UTC'))
1319+
assert_series_equal(result, expected)

0 commit comments

Comments
 (0)