From 907baa0cba53bc5a51acdc29c865f511cddd35bb Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 21 Jul 2023 10:30:39 -0700 Subject: [PATCH 1/4] REF: use stdlib json --- pandas/io/excel/_odswriter.py | 5 ++--- pandas/io/excel/_xlsxwriter.py | 5 ++--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/pandas/io/excel/_odswriter.py b/pandas/io/excel/_odswriter.py index 0bc335a9b75b6..74cbe90acdae8 100644 --- a/pandas/io/excel/_odswriter.py +++ b/pandas/io/excel/_odswriter.py @@ -2,6 +2,7 @@ from collections import defaultdict import datetime +import json from typing import ( TYPE_CHECKING, Any, @@ -10,8 +11,6 @@ overload, ) -from pandas._libs import json - from pandas.io.excel._base import ExcelWriter from pandas.io.excel._util import ( combine_kwargs, @@ -257,7 +256,7 @@ def _process_style(self, style: dict[str, Any] | None) -> str | None: if style is None: return None - style_key = json.ujson_dumps(style) + style_key = json.dumps(style) if style_key in self._style_dict: return self._style_dict[style_key] name = f"pd{len(self._style_dict)+1}" diff --git a/pandas/io/excel/_xlsxwriter.py b/pandas/io/excel/_xlsxwriter.py index afa988a5eda51..6eacac8c064fb 100644 --- a/pandas/io/excel/_xlsxwriter.py +++ b/pandas/io/excel/_xlsxwriter.py @@ -1,12 +1,11 @@ from __future__ import annotations +import json from typing import ( TYPE_CHECKING, Any, ) -from pandas._libs import json - from pandas.io.excel._base import ExcelWriter from pandas.io.excel._util import ( combine_kwargs, @@ -262,7 +261,7 @@ def _write_cells( for cell in cells: val, fmt = self._value_with_fmt(cell.val) - stylekey = json.ujson_dumps(cell.style) + stylekey = json.dumps(cell.style) if fmt: stylekey += fmt From 89691eb52b290464a42f7ff6006efc59fa9c029e Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 21 Jul 2023 10:30:56 -0700 Subject: [PATCH 2/4] remove unnecessary GET_TC calls --- .../pandas/vendored/ujson/lib/ultrajson.h | 3 ++ .../src/vendored/ujson/python/objToJSON.c | 30 +++++++++---------- 2 files changed, 18 insertions(+), 15 deletions(-) diff --git a/pandas/_libs/include/pandas/vendored/ujson/lib/ultrajson.h b/pandas/_libs/include/pandas/vendored/ujson/lib/ultrajson.h index 54bcca9e4136c..a72efd580f909 100644 --- a/pandas/_libs/include/pandas/vendored/ujson/lib/ultrajson.h +++ b/pandas/_libs/include/pandas/vendored/ujson/lib/ultrajson.h @@ -137,6 +137,9 @@ typedef int64_t JSLONG; #error "Endianness not supported" #endif +// PANDAS_FORMAT is *not* in the original file this is vendored from +enum PANDAS_FORMAT { SPLIT, RECORDS, INDEX, COLUMNS, VALUES }; + enum JSTYPES { JT_NULL, // NULL JT_TRUE, // boolean true diff --git a/pandas/_libs/src/vendored/ujson/python/objToJSON.c b/pandas/_libs/src/vendored/ujson/python/objToJSON.c index 1fa82215179a8..50a444e710f9b 100644 --- a/pandas/_libs/src/vendored/ujson/python/objToJSON.c +++ b/pandas/_libs/src/vendored/ujson/python/objToJSON.c @@ -142,8 +142,6 @@ typedef struct __PyObjectEncoder { #define GET_TC(__ptrtc) ((TypeContext *)((__ptrtc)->prv)) -enum PANDAS_FORMAT { SPLIT, RECORDS, INDEX, COLUMNS, VALUES }; - int PdBlock_iterNext(JSOBJ, JSONTypeContext *); static TypeContext *createTypeContext(void) { @@ -1318,6 +1316,7 @@ char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc, } else if (PyDate_Check(item) || PyDelta_Check(item)) { is_datetimelike = 1; if (PyObject_HasAttrString(item, "_value")) { + // pd.Timestamp object or pd.NaT // see test_date_index_and_values for case with non-nano i8date = get_long_attr(item, "_value"); } else { @@ -1471,12 +1470,12 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) { } // Currently no way to pass longVal to iso function, so use // state management - GET_TC(tc)->longValue = longVal; + pc->longValue = longVal; tc->type = JT_UTF8; } else { NPY_DATETIMEUNIT base = ((PyObjectEncoder *)tc->encoder)->datetimeUnit; - GET_TC(tc)->longValue = NpyDateTimeToEpoch(longVal, base); + pc->longValue = NpyDateTimeToEpoch(longVal, base); tc->type = JT_LONG; } } @@ -1497,9 +1496,9 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) { if (PyLong_Check(obj)) { tc->type = JT_LONG; int overflow = 0; - GET_TC(tc)->longValue = PyLong_AsLongLongAndOverflow(obj, &overflow); + pc->longValue = PyLong_AsLongLongAndOverflow(obj, &overflow); int err; - err = (GET_TC(tc)->longValue == -1) && PyErr_Occurred(); + err = (pc->longValue == -1) && PyErr_Occurred(); if (overflow) { tc->type = JT_BIGNUM; @@ -1513,7 +1512,7 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) { if (npy_isnan(val) || npy_isinf(val)) { tc->type = JT_NULL; } else { - GET_TC(tc)->doubleValue = val; + pc->doubleValue = val; tc->type = JT_DOUBLE; } return; @@ -1526,7 +1525,7 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) { tc->type = JT_UTF8; return; } else if (object_is_decimal_type(obj)) { - GET_TC(tc)->doubleValue = PyFloat_AsDouble(obj); + pc->doubleValue = PyFloat_AsDouble(obj); tc->type = JT_DOUBLE; return; } else if (PyDateTime_Check(obj) || PyDate_Check(obj)) { @@ -1541,7 +1540,7 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) { } else { NPY_DATETIMEUNIT base = ((PyObjectEncoder *)tc->encoder)->datetimeUnit; - GET_TC(tc)->longValue = PyDateTimeToEpoch(obj, base); + pc->longValue = PyDateTimeToEpoch(obj, base); tc->type = JT_LONG; } return; @@ -1573,12 +1572,13 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) { } else { NPY_DATETIMEUNIT base = ((PyObjectEncoder *)tc->encoder)->datetimeUnit; - GET_TC(tc)->longValue = PyDateTimeToEpoch(obj, base); + pc->longValue = PyDateTimeToEpoch(obj, base); tc->type = JT_LONG; } return; } else if (PyDelta_Check(obj)) { if (PyObject_HasAttrString(obj, "_value")) { + // pd.Timedelta object or pd.NaT value = get_long_attr(obj, "_value"); } else { value = total_seconds(obj) * 1000000000LL; // nanoseconds per sec @@ -1604,11 +1604,11 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) { tc->type = JT_LONG; } - GET_TC(tc)->longValue = value; + pc->longValue = value; return; } else if (PyArray_IsScalar(obj, Integer)) { tc->type = JT_LONG; - PyArray_CastScalarToCtype(obj, &(GET_TC(tc)->longValue), + PyArray_CastScalarToCtype(obj, &(pc->longValue), PyArray_DescrFromType(NPY_INT64)); exc = PyErr_Occurred(); @@ -1619,12 +1619,12 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) { return; } else if (PyArray_IsScalar(obj, Bool)) { - PyArray_CastScalarToCtype(obj, &(GET_TC(tc)->longValue), + PyArray_CastScalarToCtype(obj, &(pc->longValue), PyArray_DescrFromType(NPY_BOOL)); - tc->type = (GET_TC(tc)->longValue) ? JT_TRUE : JT_FALSE; + tc->type = (pc->longValue) ? JT_TRUE : JT_FALSE; return; } else if (PyArray_IsScalar(obj, Float) || PyArray_IsScalar(obj, Double)) { - PyArray_CastScalarToCtype(obj, &(GET_TC(tc)->doubleValue), + PyArray_CastScalarToCtype(obj, &(pc->doubleValue), PyArray_DescrFromType(NPY_DOUBLE)); tc->type = JT_DOUBLE; return; From 5dc92168aa6552bbe495102221465987704596bc Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 16 Aug 2023 10:39:57 -0700 Subject: [PATCH 3/4] REF: update dumps->ujson_dumps --- pandas/io/json/__init__.py | 8 ++--- pandas/tests/io/json/test_pandas.py | 45 ++++++++++++----------------- 2 files changed, 23 insertions(+), 30 deletions(-) diff --git a/pandas/io/json/__init__.py b/pandas/io/json/__init__.py index ff19cf6e9d4cc..8f4e7a62834b5 100644 --- a/pandas/io/json/__init__.py +++ b/pandas/io/json/__init__.py @@ -1,14 +1,14 @@ from pandas.io.json._json import ( read_json, to_json, - ujson_dumps as dumps, - ujson_loads as loads, + ujson_dumps, + ujson_loads, ) from pandas.io.json._table_schema import build_table_schema __all__ = [ - "dumps", - "loads", + "ujson_dumps", + "ujson_loads", "read_json", "to_json", "build_table_schema", diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 637d62b98a831..ff9b4acd96499 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -28,6 +28,8 @@ StringArray, ) +from pandas.io.json import ujson_dumps + def test_literal_json_deprecation(): # PR 53409 @@ -865,14 +867,13 @@ def test_date_index_and_values(self, date_format, as_object, date_typ): ) def test_convert_dates_infer(self, infer_word): # GH10747 - from pandas.io.json import dumps data = [{"id": 1, infer_word: 1036713600000}, {"id": 2}] expected = DataFrame( [[1, Timestamp("2002-11-08")], [2, pd.NaT]], columns=["id", infer_word] ) - result = read_json(StringIO(dumps(data)))[["id", infer_word]] + result = read_json(StringIO(ujson_dumps(data)))[["id", infer_word]] tm.assert_frame_equal(result, expected) @pytest.mark.parametrize( @@ -1133,8 +1134,6 @@ def test_default_handler(self): tm.assert_frame_equal(expected, result, check_index_type=False) def test_default_handler_indirect(self): - from pandas.io.json import dumps - def default(obj): if isinstance(obj, complex): return [("mathjs", "Complex"), ("re", obj.real), ("im", obj.imag)] @@ -1151,7 +1150,9 @@ def default(obj): '[9,[[1,null],["STR",null],[[["mathjs","Complex"],' '["re",4.0],["im",-5.0]],"N\\/A"]]]' ) - assert dumps(df_list, default_handler=default, orient="values") == expected + assert ( + ujson_dumps(df_list, default_handler=default, orient="values") == expected + ) def test_default_handler_numpy_unsupported_dtype(self): # GH12554 to_json raises 'Unhandled numpy dtype 15' @@ -1235,23 +1236,19 @@ def test_sparse(self): ], ) def test_tz_is_utc(self, ts): - from pandas.io.json import dumps - exp = '"2013-01-10T05:00:00.000Z"' - assert dumps(ts, iso_dates=True) == exp + assert ujson_dumps(ts, iso_dates=True) == exp dt = ts.to_pydatetime() - assert dumps(dt, iso_dates=True) == exp + assert ujson_dumps(dt, iso_dates=True) == exp def test_tz_is_naive(self): - from pandas.io.json import dumps - ts = Timestamp("2013-01-10 05:00:00") exp = '"2013-01-10T05:00:00.000"' - assert dumps(ts, iso_dates=True) == exp + assert ujson_dumps(ts, iso_dates=True) == exp dt = ts.to_pydatetime() - assert dumps(dt, iso_dates=True) == exp + assert ujson_dumps(dt, iso_dates=True) == exp @pytest.mark.parametrize( "tz_range", @@ -1262,8 +1259,6 @@ def test_tz_is_naive(self): ], ) def test_tz_range_is_utc(self, tz_range): - from pandas.io.json import dumps - exp = '["2013-01-01T05:00:00.000Z","2013-01-02T05:00:00.000Z"]' dfexp = ( '{"DT":{' @@ -1271,20 +1266,18 @@ def test_tz_range_is_utc(self, tz_range): '"1":"2013-01-02T05:00:00.000Z"}}' ) - assert dumps(tz_range, iso_dates=True) == exp + assert ujson_dumps(tz_range, iso_dates=True) == exp dti = DatetimeIndex(tz_range) # Ensure datetimes in object array are serialized correctly # in addition to the normal DTI case - assert dumps(dti, iso_dates=True) == exp - assert dumps(dti.astype(object), iso_dates=True) == exp + assert ujson_dumps(dti, iso_dates=True) == exp + assert ujson_dumps(dti.astype(object), iso_dates=True) == exp df = DataFrame({"DT": dti}) - result = dumps(df, iso_dates=True) + result = ujson_dumps(df, iso_dates=True) assert result == dfexp - assert dumps(df.astype({"DT": object}), iso_dates=True) + assert ujson_dumps(df.astype({"DT": object}), iso_dates=True) def test_tz_range_is_naive(self): - from pandas.io.json import dumps - dti = pd.date_range("2013-01-01 05:00:00", periods=2) exp = '["2013-01-01T05:00:00.000","2013-01-02T05:00:00.000"]' @@ -1292,12 +1285,12 @@ def test_tz_range_is_naive(self): # Ensure datetimes in object array are serialized correctly # in addition to the normal DTI case - assert dumps(dti, iso_dates=True) == exp - assert dumps(dti.astype(object), iso_dates=True) == exp + assert ujson_dumps(dti, iso_dates=True) == exp + assert ujson_dumps(dti.astype(object), iso_dates=True) == exp df = DataFrame({"DT": dti}) - result = dumps(df, iso_dates=True) + result = ujson_dumps(df, iso_dates=True) assert result == dfexp - assert dumps(df.astype({"DT": object}), iso_dates=True) + assert ujson_dumps(df.astype({"DT": object}), iso_dates=True) def test_read_inline_jsonl(self): # GH9180 From 8339599ad493d2675533da62b5434f1a337b7d29 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 16 Aug 2023 14:41:46 -0700 Subject: [PATCH 4/4] revert enum move --- pandas/_libs/include/pandas/vendored/ujson/lib/ultrajson.h | 3 --- pandas/_libs/src/vendored/ujson/python/objToJSON.c | 2 ++ 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/include/pandas/vendored/ujson/lib/ultrajson.h b/pandas/_libs/include/pandas/vendored/ujson/lib/ultrajson.h index a72efd580f909..54bcca9e4136c 100644 --- a/pandas/_libs/include/pandas/vendored/ujson/lib/ultrajson.h +++ b/pandas/_libs/include/pandas/vendored/ujson/lib/ultrajson.h @@ -137,9 +137,6 @@ typedef int64_t JSLONG; #error "Endianness not supported" #endif -// PANDAS_FORMAT is *not* in the original file this is vendored from -enum PANDAS_FORMAT { SPLIT, RECORDS, INDEX, COLUMNS, VALUES }; - enum JSTYPES { JT_NULL, // NULL JT_TRUE, // boolean true diff --git a/pandas/_libs/src/vendored/ujson/python/objToJSON.c b/pandas/_libs/src/vendored/ujson/python/objToJSON.c index 50a444e710f9b..4a22de886742c 100644 --- a/pandas/_libs/src/vendored/ujson/python/objToJSON.c +++ b/pandas/_libs/src/vendored/ujson/python/objToJSON.c @@ -142,6 +142,8 @@ typedef struct __PyObjectEncoder { #define GET_TC(__ptrtc) ((TypeContext *)((__ptrtc)->prv)) +enum PANDAS_FORMAT { SPLIT, RECORDS, INDEX, COLUMNS, VALUES }; + int PdBlock_iterNext(JSOBJ, JSONTypeContext *); static TypeContext *createTypeContext(void) {