pandas-dev · jreback · May 4, 2022 · Apr 10, 2022 · Apr 10, 2022 · Apr 10, 2022
diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst
@@ -563,6 +563,7 @@ I/O
 - Bug in Parquet roundtrip for Interval dtype with ``datetime64[ns]`` subtype (:issue:`45881`)
 - Bug in :func:`read_excel` when reading a ``.ods`` file with newlines between xml elements (:issue:`45598`)
 - Bug in :func:`read_parquet` when ``engine="fastparquet"`` where the file was not closed on error (:issue:`46555`)
+- Bug in :meth:`DataFrame.to_json`, :meth:`Series.to_json`, and :meth:`Index.to_json` where tz-aware datetimes were being incorrectly localized to UTC (:issue:`38760`)
 
 Period
 ^^^^^^

diff --git a/pandas/_libs/src/ujson/python/date_conversions.c b/pandas/_libs/src/ujson/python/date_conversions.c
@@ -54,8 +54,8 @@ char *int64ToIso(int64_t value, NPY_DATETIMEUNIT base, size_t *len) {
         PyErr_NoMemory();
         return NULL;
     }
-
-    ret_code = make_iso_8601_datetime(&dts, result, *len, base);
+    ret_code = make_iso_8601_datetime(&dts, result, *len,
+                                      0 /* datetime64 is always naive */, base);
     if (ret_code != 0) {
         PyErr_SetString(PyExc_ValueError,
                         "Could not convert datetime value to string");
@@ -90,7 +90,32 @@ char *PyDateTimeToIso(PyObject *obj, NPY_DATETIMEUNIT base,
 
     *len = (size_t)get_datetime_iso_8601_strlen(0, base);
     char *result = PyObject_Malloc(*len);
-    ret = make_iso_8601_datetime(&dts, result, *len, base);
+    // Check to see if PyDateTime has a timezone.
+    // Don't convert to UTC if it doesn't.
+    int is_tz_aware = 0;
+    PyObject *tmp;
+    PyObject *offset;
+    if (PyObject_HasAttrString((PyObject*)obj, "tzinfo")) {
+        tmp = PyObject_GetAttrString(obj, "tzinfo");
+        if (tmp == NULL) {
+            return NULL;
+        }
+        if (tmp == Py_None) {
+            Py_DECREF(tmp);
+        } else {
+            offset = PyObject_CallMethod(tmp, "utcoffset", "O", obj);
+            if (offset == NULL) {
+                Py_DECREF(tmp);
+                return NULL;
+            }
+            Py_DECREF(tmp);
+            if (offset != Py_None) {
+                is_tz_aware = 1;
+            }
+            Py_DECREF(offset);
+        }
+    }
+    ret = make_iso_8601_datetime(&dts, result, *len, is_tz_aware, base);
 
     if (ret != 0) {
         PyErr_SetString(PyExc_ValueError,

diff --git a/pandas/_libs/src/ujson/python/objToJSON.c b/pandas/_libs/src/ujson/python/objToJSON.c
@@ -221,8 +221,18 @@ static PyObject *get_values(PyObject *obj) {
         // The special cases to worry about are dt64tz and category[dt64tz].
         //  In both cases we want the UTC-localized datetime64 ndarray,
         //  without going through and object array of Timestamps.
+        if (PyObject_HasAttrString(obj, "tz")) {
+            PyObject *tz = PyObject_GetAttrString(obj, "tz");
+            if (tz != Py_None) {
+                // Go through object array if we have dt64tz, since tz info will
+                // be lost if values is used directly.
+                Py_DECREF(tz);
+                values = PyObject_CallMethod(obj, "__array__", NULL);
+                return values;
+            }
+            Py_DECREF(tz);
+        }
         values = PyObject_GetAttrString(obj, "values");
-
         if (values == NULL) {
             // Clear so we can subsequently try another method
             PyErr_Clear();
@@ -707,6 +717,22 @@ void PdBlock_iterBegin(JSOBJ _obj, JSONTypeContext *tc) {
 
     for (i = 0; i < PyObject_Length(arrays); i++) {
         array = PyList_GET_ITEM(arrays, i);
+        // tz information is lost when dt64tz is converted
+        // to numpy arrays.
+        // TODO(lithomas1): Patch column_arrays(actually values_for_json)
+        // to return EAs instead of casting to object
+        if (PyArray_TYPE((PyArrayObject *)array) == NPY_DATETIME) {
+            PyObject *mgr = PyObject_GetAttrString(obj, "_mgr");
+            PyObject *dtarr = PyObject_CallMethod(mgr, "iget_values", "n", i);
+            PyObject *tz = PyObject_GetAttrString(dtarr, "tz");
+            if (tz != Py_None) {
+                // we have a timezone, use an object array of Timestamp
+                array = PyObject_CallMethod(dtarr, "__array__", NULL);
+            }
+            Py_DECREF(mgr);
+            Py_DECREF(dtarr);
+            Py_DECREF(tz);
+        }
         if (!array) {
             GET_TC(tc)->iterNext = NpyArr_iterNextNone;
             goto ARR_RET;

diff --git a/pandas/_libs/tslibs/src/datetime/np_datetime_strings.c b/pandas/_libs/tslibs/src/datetime/np_datetime_strings.c
@@ -605,7 +605,7 @@ int get_datetime_iso_8601_strlen(int local, NPY_DATETIMEUNIT base) {
  *  string was too short).
  */
 int make_iso_8601_datetime(npy_datetimestruct *dts, char *outstr, int outlen,
-                           NPY_DATETIMEUNIT base) {
+                           int utc, NPY_DATETIMEUNIT base) {
     char *substr = outstr;
     int sublen = outlen;
     int tmplen;
@@ -884,13 +884,14 @@ int make_iso_8601_datetime(npy_datetimestruct *dts, char *outstr, int outlen,
 
 add_time_zone:
     /* UTC "Zulu" time */
-    if (sublen < 1) {
-        goto string_too_short;
+    if (utc) {
+        if (sublen < 1) {
+            goto string_too_short;
+        }
+        substr[0] = 'Z';
+        substr += 1;
+        sublen -= 1;
     }
-    substr[0] = 'Z';
-    substr += 1;
-    sublen -= 1;
-
     /* Add a NULL terminator, and return */
     if (sublen > 0) {
         substr[0] = '\0';

diff --git a/pandas/_libs/tslibs/src/datetime/np_datetime_strings.h b/pandas/_libs/tslibs/src/datetime/np_datetime_strings.h
@@ -78,7 +78,7 @@ get_datetime_iso_8601_strlen(int local, NPY_DATETIMEUNIT base);
  */
 int
 make_iso_8601_datetime(npy_datetimestruct *dts, char *outstr, int outlen,
-                       NPY_DATETIMEUNIT base);
+                       int utc, NPY_DATETIMEUNIT base);
 
 /*
  * Converts an pandas_timedeltastruct to an ISO 8601 string.

diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py
@@ -301,7 +301,7 @@ def test_to_json(self, df_table):
                     ("idx", 0),
                     ("A", 1),
                     ("B", "a"),
-                    ("C", "2016-01-01T00:00:00.000Z"),
+                    ("C", "2016-01-01T00:00:00.000"),
                     ("D", "P0DT1H0M0S"),
                     ("E", "a"),
                     ("F", "a"),
@@ -314,7 +314,7 @@ def test_to_json(self, df_table):
                     ("idx", 1),
                     ("A", 2),
                     ("B", "b"),
-                    ("C", "2016-01-02T00:00:00.000Z"),
+                    ("C", "2016-01-02T00:00:00.000"),
                     ("D", "P0DT1H1M0S"),
                     ("E", "b"),
                     ("F", "b"),
@@ -327,7 +327,7 @@ def test_to_json(self, df_table):
                     ("idx", 2),
                     ("A", 3),
                     ("B", "c"),
-                    ("C", "2016-01-03T00:00:00.000Z"),
+                    ("C", "2016-01-03T00:00:00.000"),
                     ("D", "P0DT1H2M0S"),
                     ("E", "c"),
                     ("F", "c"),
@@ -340,7 +340,7 @@ def test_to_json(self, df_table):
                     ("idx", 3),
                     ("A", 4),
                     ("B", "c"),
-                    ("C", "2016-01-04T00:00:00.000Z"),
+                    ("C", "2016-01-04T00:00:00.000"),
                     ("D", "P0DT1H3M0S"),
                     ("E", "c"),
                     ("F", "c"),
@@ -397,8 +397,8 @@ def test_to_json_period_index(self):
 
         schema = {"fields": fields, "primaryKey": ["index"]}
         data = [
-            OrderedDict([("index", "2015-11-01T00:00:00.000Z"), ("values", 1)]),
-            OrderedDict([("index", "2016-02-01T00:00:00.000Z"), ("values", 1)]),
+            OrderedDict([("index", "2015-11-01T00:00:00.000"), ("values", 1)]),
+            OrderedDict([("index", "2016-02-01T00:00:00.000"), ("values", 1)]),
         ]
         expected = OrderedDict([("schema", schema), ("data", data)])
 
@@ -635,7 +635,7 @@ def test_timestamp_in_columns(self):
         )
         result = df.to_json(orient="table")
         js = json.loads(result)
-        assert js["schema"]["fields"][1]["name"] == "2016-01-01T00:00:00.000Z"
+        assert js["schema"]["fields"][1]["name"] == "2016-01-01T00:00:00.000"
         assert js["schema"]["fields"][2]["name"] == "P0DT0H0M10S"
 
     @pytest.mark.parametrize(

diff --git a/pandas/tests/io/json/test_json_table_schema_ext_dtype.py b/pandas/tests/io/json/test_json_table_schema_ext_dtype.py
@@ -151,7 +151,7 @@ def test_build_date_series(self):
         expected = OrderedDict(
             [
                 ("schema", schema),
-                ("data", [OrderedDict([("id", 0), ("a", "2021-10-10T00:00:00.000Z")])]),
+                ("data", [OrderedDict([("id", 0), ("a", "2021-10-10T00:00:00.000")])]),
             ]
         )
 
@@ -256,7 +256,7 @@ def test_to_json(self):
             OrderedDict(
                 [
                     ("idx", 0),
-                    ("A", "2021-10-10T00:00:00.000Z"),
+                    ("A", "2021-10-10T00:00:00.000"),
                     ("B", 10.0),
                     ("C", "pandas"),
                     ("D", 10),

diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
@@ -822,7 +822,7 @@ def test_date_index_and_values(self, date_format, as_object, date_typ):
             expected = '{"1577836800000":1577836800000,"null":null}'
         else:
             expected = (
-                '{"2020-01-01T00:00:00.000Z":"2020-01-01T00:00:00.000Z","null":null}'
+                '{"2020-01-01T00:00:00.000":"2020-01-01T00:00:00.000","null":null}'
             )
 
         if as_object:
@@ -875,8 +875,6 @@ def test_date_format_frame(self, date, date_unit, datetime_frame):
             json = df.to_json(date_format="iso")
         result = read_json(json)
         expected = df.copy()
-        expected.index = expected.index.tz_localize("UTC")
-        expected["date"] = expected["date"].dt.tz_localize("UTC")
         tm.assert_frame_equal(result, expected)
 
     def test_date_format_frame_raises(self, datetime_frame):
@@ -905,8 +903,6 @@ def test_date_format_series(self, date, date_unit, datetime_series):
             json = ts.to_json(date_format="iso")
         result = read_json(json, typ="series")
         expected = ts.copy()
-        expected.index = expected.index.tz_localize("UTC")
-        expected = expected.dt.tz_localize("UTC")
         tm.assert_series_equal(result, expected)
 
     def test_date_format_series_raises(self, datetime_series):
@@ -1192,6 +1188,16 @@ def test_tz_is_utc(self, ts):
         dt = ts.to_pydatetime()
         assert dumps(dt, iso_dates=True) == exp
 
+    def test_tz_is_naive(self):
+        from pandas.io.json import dumps
+
+        ts = Timestamp("2013-01-10 05:00:00")
+        exp = '"2013-01-10T05:00:00.000"'
+
+        assert dumps(ts, iso_dates=True) == exp
+        dt = ts.to_pydatetime()
+        assert dumps(dt, iso_dates=True) == exp
+
     @pytest.mark.parametrize(
         "tz_range",
         [
@@ -1217,6 +1223,21 @@ def test_tz_range_is_utc(self, tz_range):
         result = dumps(df, iso_dates=True)
         assert result == dfexp
 
+    def test_tz_range_is_naive(self):
+        from pandas.io.json import dumps
+
+        tz_range = pd.date_range("2013-01-01 05:00:00", periods=2)
+
+        exp = '["2013-01-01T05:00:00.000","2013-01-02T05:00:00.000"]'
+        dfexp = '{"DT":{"0":"2013-01-01T05:00:00.000","1":"2013-01-02T05:00:00.000"}}'
+
+        assert dumps(tz_range, iso_dates=True) == exp
+        dti = DatetimeIndex(tz_range)
+        assert dumps(dti, iso_dates=True) == exp
+        df = DataFrame({"DT": dti})
+        result = dumps(df, iso_dates=True)
+        assert result == dfexp
+
     def test_read_inline_jsonl(self):
         # GH9180
         result = read_json('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n', lines=True)