pandas-dev · lithomas1 · Jul 25, 2023 · Jun 21, 2023 · Jun 21, 2023 · Jun 21, 2023
diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst
@@ -528,6 +528,7 @@ I/O
 - Bug in :func:`read_html`, style elements were read into DataFrames (:issue:`52197`)
 - Bug in :func:`read_html`, tail texts were removed together with elements containing ``display:none`` style (:issue:`51629`)
 - Bug in :func:`read_sql` when reading multiple timezone aware columns with the same column name (:issue:`44421`)
+- Bug in :meth:`DataFrame.to_json` where :class:`DateTimeArray`/:class:`DateTimeIndex` with non nanosecond precision could not be serialized correctly (:issue:`53686`)
 - Bug in :func:`read_xml` stripping whitespace in string data (:issue:`53811`)
 - Bug in :meth:`DataFrame.to_html` where ``colspace`` was incorrectly applied in case of multi index columns (:issue:`53885`)
 - Bug when writing and reading empty Stata dta files where dtype information was lost (:issue:`46240`)

diff --git a/pandas/_libs/include/pandas/datetime/date_conversions.h b/pandas/_libs/include/pandas/datetime/date_conversions.h
@@ -18,7 +18,10 @@ int scaleNanosecToUnit(npy_int64 *value, NPY_DATETIMEUNIT unit);
 // up to precision `base` e.g. base="s" yields 2020-01-03T00:00:00Z
 // while base="ns" yields "2020-01-01T00:00:00.000000000Z"
 // len is mutated to save the length of the returned string
-char *int64ToIso(int64_t value, NPY_DATETIMEUNIT base, size_t *len);
+char *int64ToIso(int64_t value,
+                 NPY_DATETIMEUNIT valueUnit,
+                 NPY_DATETIMEUNIT base,
+                 size_t *len);
 
 // TODO(username): this function doesn't do a lot; should augment or
 // replace with scaleNanosecToUnit

diff --git a/pandas/_libs/include/pandas/datetime/pd_datetime.h b/pandas/_libs/include/pandas/datetime/pd_datetime.h
@@ -34,7 +34,7 @@ typedef struct {
   npy_datetime (*npy_datetimestruct_to_datetime)(NPY_DATETIMEUNIT,
                                                  const npy_datetimestruct *);
   int (*scaleNanosecToUnit)(npy_int64 *, NPY_DATETIMEUNIT);
-  char *(*int64ToIso)(int64_t, NPY_DATETIMEUNIT, size_t *);
+  char *(*int64ToIso)(int64_t, NPY_DATETIMEUNIT, NPY_DATETIMEUNIT, size_t *);
   npy_datetime (*NpyDateTimeToEpoch)(npy_datetime, NPY_DATETIMEUNIT);
   char *(*PyDateTimeToIso)(PyObject *, NPY_DATETIMEUNIT, size_t *);
   npy_datetime (*PyDateTimeToEpoch)(PyObject *, NPY_DATETIMEUNIT);
@@ -73,8 +73,8 @@ static PandasDateTime_CAPI *PandasDateTimeAPI = NULL;
                                                     (npy_datetimestruct))
 #define scaleNanosecToUnit(value, unit)                                        \
   PandasDateTimeAPI->scaleNanosecToUnit((value), (unit))
-#define int64ToIso(value, base, len)                                           \
-  PandasDateTimeAPI->int64ToIso((value), (base), (len))
+#define int64ToIso(value, valueUnit, base, len)                                \
+  PandasDateTimeAPI->int64ToIso((value), (valueUnit), (base), (len))
 #define NpyDateTimeToEpoch(dt, base)                                           \
   PandasDateTimeAPI->NpyDateTimeToEpoch((dt), (base))
 #define PyDateTimeToIso(obj, base, len)                                        \

diff --git a/pandas/_libs/src/datetime/date_conversions.c b/pandas/_libs/src/datetime/date_conversions.c
@@ -41,11 +41,14 @@ int scaleNanosecToUnit(npy_int64 *value, NPY_DATETIMEUNIT unit) {
 }
 
 /* Converts the int64_t representation of a datetime to ISO; mutates len */
-char *int64ToIso(int64_t value, NPY_DATETIMEUNIT base, size_t *len) {
+char *int64ToIso(int64_t value,
+                 NPY_DATETIMEUNIT valueUnit,
+                 NPY_DATETIMEUNIT base,
+                 size_t *len) {
     npy_datetimestruct dts;
     int ret_code;
 
-    pandas_datetime_to_datetimestruct(value, NPY_FR_ns, &dts);
+    pandas_datetime_to_datetimestruct(value, valueUnit, &dts);
 
     *len = (size_t)get_datetime_iso_8601_strlen(0, base);
     char *result = PyObject_Malloc(*len);

diff --git a/pandas/_libs/src/vendored/ujson/python/objToJSON.c b/pandas/_libs/src/vendored/ujson/python/objToJSON.c
@@ -131,6 +131,7 @@ typedef struct __PyObjectEncoder {
 
     int datetimeIso;
     NPY_DATETIMEUNIT datetimeUnit;
+    NPY_DATETIMEUNIT valueUnit;
 
     // output format style for pandas data types
     int outputFormat;
@@ -350,7 +351,8 @@ static char *PyUnicodeToUTF8(JSOBJ _obj, JSONTypeContext *tc,
 static char *NpyDateTimeToIsoCallback(JSOBJ Py_UNUSED(unused),
                                       JSONTypeContext *tc, size_t *len) {
     NPY_DATETIMEUNIT base = ((PyObjectEncoder *)tc->encoder)->datetimeUnit;
-    GET_TC(tc)->cStr = int64ToIso(GET_TC(tc)->longValue, base, len);
+    NPY_DATETIMEUNIT valueUnit = ((PyObjectEncoder *)tc->encoder)->valueUnit;
+    GET_TC(tc)->cStr = int64ToIso(GET_TC(tc)->longValue, valueUnit, base, len);
     return GET_TC(tc)->cStr;
 }
 
@@ -364,8 +366,9 @@ static char *NpyTimeDeltaToIsoCallback(JSOBJ Py_UNUSED(unused),
 /* JSON callback */
 static char *PyDateTimeToIsoCallback(JSOBJ obj, JSONTypeContext *tc,
                                      size_t *len) {
-    if (!PyDate_Check(obj)) {
-        PyErr_SetString(PyExc_TypeError, "Expected date object");
+    if (!PyDate_Check(obj) && !PyDateTime_Check(obj)) {
+        PyErr_SetString(PyExc_TypeError, "Expected date or datetime object");
+        ((JSONObjectEncoder *)tc->encoder)->errorMsg = "";
         return NULL;
     }
 
@@ -502,6 +505,10 @@ int NpyArr_iterNextItem(JSOBJ obj, JSONTypeContext *tc) {
         GET_TC(tc)->itemValue = obj;
         Py_INCREF(obj);
         ((PyObjectEncoder *)tc->encoder)->npyType = PyArray_TYPE(npyarr->array);
+        // Also write the resolution (unit) of the ndarray
+        PyArray_Descr *dtype = PyArray_DESCR(npyarr->array);
+        ((PyObjectEncoder *)tc->encoder)->valueUnit =
+            get_datetime_metadata_from_dtype(dtype).base;
         ((PyObjectEncoder *)tc->encoder)->npyValue = npyarr->dataptr;
         ((PyObjectEncoder *)tc->encoder)->npyCtxtPassthru = npyarr;
     } else {
@@ -1255,6 +1262,7 @@ char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc,
     char **ret;
     char *dataptr, *cLabel;
     int type_num;
+    PyArray_Descr *dtype;
     NPY_DATETIMEUNIT base = enc->datetimeUnit;
 
     if (!labels) {
@@ -1283,6 +1291,7 @@ char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc,
     stride = PyArray_STRIDE(labels, 0);
     dataptr = PyArray_DATA(labels);
     type_num = PyArray_TYPE(labels);
+    dtype = PyArray_DESCR(labels);
 
     for (i = 0; i < num; i++) {
         item = PyArray_GETITEM(labels, dataptr);
@@ -1293,7 +1302,8 @@ char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc,
         }
 
         int is_datetimelike = 0;
-        npy_int64 nanosecVal;
+        npy_int64 i8date;
+        NPY_DATETIMEUNIT dateUnit = NPY_FR_ns;
         if (PyTypeNum_ISDATETIME(type_num)) {
             is_datetimelike = 1;
             PyArray_VectorUnaryFunc *castfunc =
@@ -1303,35 +1313,37 @@ char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc,
                              "Cannot cast numpy dtype %d to long",
                              enc->npyType);
             }
-            castfunc(dataptr, &nanosecVal, 1, NULL, NULL);
+            castfunc(dataptr, &i8date, 1, NULL, NULL);
+            dateUnit = get_datetime_metadata_from_dtype(dtype).base;
         } else if (PyDate_Check(item) || PyDelta_Check(item)) {
             is_datetimelike = 1;
             if (PyObject_HasAttrString(item, "_value")) {
                 // see test_date_index_and_values for case with non-nano
-                nanosecVal = get_long_attr(item, "_value");
+                i8date = get_long_attr(item, "_value");
             } else {
                 if (PyDelta_Check(item)) {
-                    nanosecVal = total_seconds(item) *
+                    i8date = total_seconds(item) *
                                  1000000000LL;  // nanoseconds per second
                 } else {
                     // datetime.* objects don't follow above rules
-                    nanosecVal = PyDateTimeToEpoch(item, NPY_FR_ns);
+                    i8date = PyDateTimeToEpoch(item, NPY_FR_ns);
                 }
             }
         }
 
         if (is_datetimelike) {
-            if (nanosecVal == get_nat()) {
+            if (i8date == get_nat()) {
                 len = 4;
                 cLabel = PyObject_Malloc(len + 1);
                 strncpy(cLabel, "null", len + 1);
             } else {
                 if (enc->datetimeIso) {
                     if ((type_num == NPY_TIMEDELTA) || (PyDelta_Check(item))) {
-                        cLabel = int64ToIsoDuration(nanosecVal, &len);
+                        // TODO(username): non-nano timedelta support?
+                        cLabel = int64ToIsoDuration(i8date, &len);
                     } else {
                         if (type_num == NPY_DATETIME) {
-                            cLabel = int64ToIso(nanosecVal, base, &len);
+                            cLabel = int64ToIso(i8date, dateUnit, base, &len);
                         } else {
                             cLabel = PyDateTimeToIso(item, base, &len);
                         }
@@ -1346,7 +1358,7 @@ char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc,
                     int size_of_cLabel = 21;  // 21 chars for int 64
                     cLabel = PyObject_Malloc(size_of_cLabel);
                     snprintf(cLabel, size_of_cLabel, "%" NPY_DATETIME_FMT,
-                            NpyDateTimeToEpoch(nanosecVal, base));
+                            NpyDateTimeToEpoch(i8date, base));
                     len = strlen(cLabel);
                 }
             }
@@ -1538,13 +1550,24 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
         tc->type = JT_UTF8;
         return;
     } else if (PyArray_IsScalar(obj, Datetime)) {
+        npy_int64 longVal;
         if (((PyDatetimeScalarObject *)obj)->obval == get_nat()) {
             tc->type = JT_NULL;
             return;
         }
+        PyArray_Descr *dtype = PyArray_DescrFromScalar(obj);
+        if (dtype->type_num == NPY_OBJECT) {
+            PyErr_Format(PyExc_ValueError, "Could not get resolution of datetime");
+        }
+
+        PyArray_Descr *outcode = PyArray_DescrFromType(NPY_INT64);
+        PyArray_CastScalarToCtype(obj, &longVal, outcode);
+        Py_DECREF(outcode);
 
         if (enc->datetimeIso) {
-            pc->PyTypeToUTF8 = PyDateTimeToIsoCallback;
+            GET_TC(tc)->longValue = longVal;
+            pc->PyTypeToUTF8 = NpyDateTimeToIsoCallback;
+            enc->valueUnit = get_datetime_metadata_from_dtype(dtype).base;
             tc->type = JT_UTF8;
         } else {
             NPY_DATETIMEUNIT base =

diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
@@ -954,6 +954,45 @@ def test_date_unit(self, unit, datetime_frame):
         result = read_json(StringIO(json), date_unit=None)
         tm.assert_frame_equal(result, df)
 
+    @pytest.mark.parametrize("unit", ["s", "ms", "us"])
+    def test_iso_non_nano_datetimes(self, unit):
+        # Test that numpy datetimes
+        # in an Index or a column with non-nano resolution can be serialized
+        # correctly
+        # GH53686
+        index = DatetimeIndex(
+            [np.datetime64("2023-01-01T11:22:33.123456", unit)],
+            dtype=f"datetime64[{unit}]",
+        )
+        df = DataFrame(
+            {
+                "date": Series(
+                    [np.datetime64("2022-01-01T11:22:33.123456", unit)],
+                    dtype=f"datetime64[{unit}]",
+                    index=index,
+                ),
+                "date_obj": Series(
+                    [np.datetime64("2023-01-01T11:22:33.123456", unit)],
+                    dtype=object,
+                    index=index,
+                ),
+            },
+        )
+
+        buf = StringIO()
+        df.to_json(buf, date_format="iso", date_unit=unit)
+        buf.seek(0)
+
+        # read_json always reads datetimes in nanosecond resolution
+        # TODO: check_dtype/check_index_type should be removable
+        # once read_json gets non-nano support
+        tm.assert_frame_equal(
+            read_json(buf, convert_dates=["date", "date_obj"]),
+            df,
+            check_index_type=False,
+            check_dtype=False,
+        )
+
     def test_weird_nested_json(self):
         # this used to core dump the parser
         s = r"""{