Skip to content

Commit 458e235

Browse files
jbrockmendelmroeschke
authored andcommitted
REF: ujson cleanups (pandas-dev#54581)
* REF: use stdlib json * remove unnecessary GET_TC calls * REF: update dumps->ujson_dumps * revert enum move
1 parent 3cabf00 commit 458e235

File tree

5 files changed

+42
-49
lines changed

5 files changed

+42
-49
lines changed

pandas/_libs/src/vendored/ujson/python/objToJSON.c

+15-13
Original file line numberDiff line numberDiff line change
@@ -1318,6 +1318,7 @@ char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc,
13181318
} else if (PyDate_Check(item) || PyDelta_Check(item)) {
13191319
is_datetimelike = 1;
13201320
if (PyObject_HasAttrString(item, "_value")) {
1321+
// pd.Timestamp object or pd.NaT
13211322
// see test_date_index_and_values for case with non-nano
13221323
i8date = get_long_attr(item, "_value");
13231324
} else {
@@ -1471,12 +1472,12 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
14711472
}
14721473
// Currently no way to pass longVal to iso function, so use
14731474
// state management
1474-
GET_TC(tc)->longValue = longVal;
1475+
pc->longValue = longVal;
14751476
tc->type = JT_UTF8;
14761477
} else {
14771478
NPY_DATETIMEUNIT base =
14781479
((PyObjectEncoder *)tc->encoder)->datetimeUnit;
1479-
GET_TC(tc)->longValue = NpyDateTimeToEpoch(longVal, base);
1480+
pc->longValue = NpyDateTimeToEpoch(longVal, base);
14801481
tc->type = JT_LONG;
14811482
}
14821483
}
@@ -1497,9 +1498,9 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
14971498
if (PyLong_Check(obj)) {
14981499
tc->type = JT_LONG;
14991500
int overflow = 0;
1500-
GET_TC(tc)->longValue = PyLong_AsLongLongAndOverflow(obj, &overflow);
1501+
pc->longValue = PyLong_AsLongLongAndOverflow(obj, &overflow);
15011502
int err;
1502-
err = (GET_TC(tc)->longValue == -1) && PyErr_Occurred();
1503+
err = (pc->longValue == -1) && PyErr_Occurred();
15031504

15041505
if (overflow) {
15051506
tc->type = JT_BIGNUM;
@@ -1513,7 +1514,7 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
15131514
if (npy_isnan(val) || npy_isinf(val)) {
15141515
tc->type = JT_NULL;
15151516
} else {
1516-
GET_TC(tc)->doubleValue = val;
1517+
pc->doubleValue = val;
15171518
tc->type = JT_DOUBLE;
15181519
}
15191520
return;
@@ -1526,7 +1527,7 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
15261527
tc->type = JT_UTF8;
15271528
return;
15281529
} else if (object_is_decimal_type(obj)) {
1529-
GET_TC(tc)->doubleValue = PyFloat_AsDouble(obj);
1530+
pc->doubleValue = PyFloat_AsDouble(obj);
15301531
tc->type = JT_DOUBLE;
15311532
return;
15321533
} else if (PyDateTime_Check(obj) || PyDate_Check(obj)) {
@@ -1541,7 +1542,7 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
15411542
} else {
15421543
NPY_DATETIMEUNIT base =
15431544
((PyObjectEncoder *)tc->encoder)->datetimeUnit;
1544-
GET_TC(tc)->longValue = PyDateTimeToEpoch(obj, base);
1545+
pc->longValue = PyDateTimeToEpoch(obj, base);
15451546
tc->type = JT_LONG;
15461547
}
15471548
return;
@@ -1573,12 +1574,13 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
15731574
} else {
15741575
NPY_DATETIMEUNIT base =
15751576
((PyObjectEncoder *)tc->encoder)->datetimeUnit;
1576-
GET_TC(tc)->longValue = PyDateTimeToEpoch(obj, base);
1577+
pc->longValue = PyDateTimeToEpoch(obj, base);
15771578
tc->type = JT_LONG;
15781579
}
15791580
return;
15801581
} else if (PyDelta_Check(obj)) {
15811582
if (PyObject_HasAttrString(obj, "_value")) {
1583+
// pd.Timedelta object or pd.NaT
15821584
value = get_long_attr(obj, "_value");
15831585
} else {
15841586
value = total_seconds(obj) * 1000000000LL; // nanoseconds per sec
@@ -1604,11 +1606,11 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
16041606

16051607
tc->type = JT_LONG;
16061608
}
1607-
GET_TC(tc)->longValue = value;
1609+
pc->longValue = value;
16081610
return;
16091611
} else if (PyArray_IsScalar(obj, Integer)) {
16101612
tc->type = JT_LONG;
1611-
PyArray_CastScalarToCtype(obj, &(GET_TC(tc)->longValue),
1613+
PyArray_CastScalarToCtype(obj, &(pc->longValue),
16121614
PyArray_DescrFromType(NPY_INT64));
16131615

16141616
exc = PyErr_Occurred();
@@ -1619,12 +1621,12 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
16191621

16201622
return;
16211623
} else if (PyArray_IsScalar(obj, Bool)) {
1622-
PyArray_CastScalarToCtype(obj, &(GET_TC(tc)->longValue),
1624+
PyArray_CastScalarToCtype(obj, &(pc->longValue),
16231625
PyArray_DescrFromType(NPY_BOOL));
1624-
tc->type = (GET_TC(tc)->longValue) ? JT_TRUE : JT_FALSE;
1626+
tc->type = (pc->longValue) ? JT_TRUE : JT_FALSE;
16251627
return;
16261628
} else if (PyArray_IsScalar(obj, Float) || PyArray_IsScalar(obj, Double)) {
1627-
PyArray_CastScalarToCtype(obj, &(GET_TC(tc)->doubleValue),
1629+
PyArray_CastScalarToCtype(obj, &(pc->doubleValue),
16281630
PyArray_DescrFromType(NPY_DOUBLE));
16291631
tc->type = JT_DOUBLE;
16301632
return;

pandas/io/excel/_odswriter.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
from collections import defaultdict
44
import datetime
5+
import json
56
from typing import (
67
TYPE_CHECKING,
78
Any,
@@ -10,8 +11,6 @@
1011
overload,
1112
)
1213

13-
from pandas._libs import json
14-
1514
from pandas.io.excel._base import ExcelWriter
1615
from pandas.io.excel._util import (
1716
combine_kwargs,
@@ -257,7 +256,7 @@ def _process_style(self, style: dict[str, Any] | None) -> str | None:
257256

258257
if style is None:
259258
return None
260-
style_key = json.ujson_dumps(style)
259+
style_key = json.dumps(style)
261260
if style_key in self._style_dict:
262261
return self._style_dict[style_key]
263262
name = f"pd{len(self._style_dict)+1}"

pandas/io/excel/_xlsxwriter.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,11 @@
11
from __future__ import annotations
22

3+
import json
34
from typing import (
45
TYPE_CHECKING,
56
Any,
67
)
78

8-
from pandas._libs import json
9-
109
from pandas.io.excel._base import ExcelWriter
1110
from pandas.io.excel._util import (
1211
combine_kwargs,
@@ -262,7 +261,7 @@ def _write_cells(
262261
for cell in cells:
263262
val, fmt = self._value_with_fmt(cell.val)
264263

265-
stylekey = json.ujson_dumps(cell.style)
264+
stylekey = json.dumps(cell.style)
266265
if fmt:
267266
stylekey += fmt
268267

pandas/io/json/__init__.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
11
from pandas.io.json._json import (
22
read_json,
33
to_json,
4-
ujson_dumps as dumps,
5-
ujson_loads as loads,
4+
ujson_dumps,
5+
ujson_loads,
66
)
77
from pandas.io.json._table_schema import build_table_schema
88

99
__all__ = [
10-
"dumps",
11-
"loads",
10+
"ujson_dumps",
11+
"ujson_loads",
1212
"read_json",
1313
"to_json",
1414
"build_table_schema",

pandas/tests/io/json/test_pandas.py

+19-26
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@
2828
StringArray,
2929
)
3030

31+
from pandas.io.json import ujson_dumps
32+
3133

3234
def test_literal_json_deprecation():
3335
# PR 53409
@@ -865,14 +867,13 @@ def test_date_index_and_values(self, date_format, as_object, date_typ):
865867
)
866868
def test_convert_dates_infer(self, infer_word):
867869
# GH10747
868-
from pandas.io.json import dumps
869870

870871
data = [{"id": 1, infer_word: 1036713600000}, {"id": 2}]
871872
expected = DataFrame(
872873
[[1, Timestamp("2002-11-08")], [2, pd.NaT]], columns=["id", infer_word]
873874
)
874875

875-
result = read_json(StringIO(dumps(data)))[["id", infer_word]]
876+
result = read_json(StringIO(ujson_dumps(data)))[["id", infer_word]]
876877
tm.assert_frame_equal(result, expected)
877878

878879
@pytest.mark.parametrize(
@@ -1133,8 +1134,6 @@ def test_default_handler(self):
11331134
tm.assert_frame_equal(expected, result, check_index_type=False)
11341135

11351136
def test_default_handler_indirect(self):
1136-
from pandas.io.json import dumps
1137-
11381137
def default(obj):
11391138
if isinstance(obj, complex):
11401139
return [("mathjs", "Complex"), ("re", obj.real), ("im", obj.imag)]
@@ -1151,7 +1150,9 @@ def default(obj):
11511150
'[9,[[1,null],["STR",null],[[["mathjs","Complex"],'
11521151
'["re",4.0],["im",-5.0]],"N\\/A"]]]'
11531152
)
1154-
assert dumps(df_list, default_handler=default, orient="values") == expected
1153+
assert (
1154+
ujson_dumps(df_list, default_handler=default, orient="values") == expected
1155+
)
11551156

11561157
def test_default_handler_numpy_unsupported_dtype(self):
11571158
# GH12554 to_json raises 'Unhandled numpy dtype 15'
@@ -1235,23 +1236,19 @@ def test_sparse(self):
12351236
],
12361237
)
12371238
def test_tz_is_utc(self, ts):
1238-
from pandas.io.json import dumps
1239-
12401239
exp = '"2013-01-10T05:00:00.000Z"'
12411240

1242-
assert dumps(ts, iso_dates=True) == exp
1241+
assert ujson_dumps(ts, iso_dates=True) == exp
12431242
dt = ts.to_pydatetime()
1244-
assert dumps(dt, iso_dates=True) == exp
1243+
assert ujson_dumps(dt, iso_dates=True) == exp
12451244

12461245
def test_tz_is_naive(self):
1247-
from pandas.io.json import dumps
1248-
12491246
ts = Timestamp("2013-01-10 05:00:00")
12501247
exp = '"2013-01-10T05:00:00.000"'
12511248

1252-
assert dumps(ts, iso_dates=True) == exp
1249+
assert ujson_dumps(ts, iso_dates=True) == exp
12531250
dt = ts.to_pydatetime()
1254-
assert dumps(dt, iso_dates=True) == exp
1251+
assert ujson_dumps(dt, iso_dates=True) == exp
12551252

12561253
@pytest.mark.parametrize(
12571254
"tz_range",
@@ -1262,42 +1259,38 @@ def test_tz_is_naive(self):
12621259
],
12631260
)
12641261
def test_tz_range_is_utc(self, tz_range):
1265-
from pandas.io.json import dumps
1266-
12671262
exp = '["2013-01-01T05:00:00.000Z","2013-01-02T05:00:00.000Z"]'
12681263
dfexp = (
12691264
'{"DT":{'
12701265
'"0":"2013-01-01T05:00:00.000Z",'
12711266
'"1":"2013-01-02T05:00:00.000Z"}}'
12721267
)
12731268

1274-
assert dumps(tz_range, iso_dates=True) == exp
1269+
assert ujson_dumps(tz_range, iso_dates=True) == exp
12751270
dti = DatetimeIndex(tz_range)
12761271
# Ensure datetimes in object array are serialized correctly
12771272
# in addition to the normal DTI case
1278-
assert dumps(dti, iso_dates=True) == exp
1279-
assert dumps(dti.astype(object), iso_dates=True) == exp
1273+
assert ujson_dumps(dti, iso_dates=True) == exp
1274+
assert ujson_dumps(dti.astype(object), iso_dates=True) == exp
12801275
df = DataFrame({"DT": dti})
1281-
result = dumps(df, iso_dates=True)
1276+
result = ujson_dumps(df, iso_dates=True)
12821277
assert result == dfexp
1283-
assert dumps(df.astype({"DT": object}), iso_dates=True)
1278+
assert ujson_dumps(df.astype({"DT": object}), iso_dates=True)
12841279

12851280
def test_tz_range_is_naive(self):
1286-
from pandas.io.json import dumps
1287-
12881281
dti = pd.date_range("2013-01-01 05:00:00", periods=2)
12891282

12901283
exp = '["2013-01-01T05:00:00.000","2013-01-02T05:00:00.000"]'
12911284
dfexp = '{"DT":{"0":"2013-01-01T05:00:00.000","1":"2013-01-02T05:00:00.000"}}'
12921285

12931286
# Ensure datetimes in object array are serialized correctly
12941287
# in addition to the normal DTI case
1295-
assert dumps(dti, iso_dates=True) == exp
1296-
assert dumps(dti.astype(object), iso_dates=True) == exp
1288+
assert ujson_dumps(dti, iso_dates=True) == exp
1289+
assert ujson_dumps(dti.astype(object), iso_dates=True) == exp
12971290
df = DataFrame({"DT": dti})
1298-
result = dumps(df, iso_dates=True)
1291+
result = ujson_dumps(df, iso_dates=True)
12991292
assert result == dfexp
1300-
assert dumps(df.astype({"DT": object}), iso_dates=True)
1293+
assert ujson_dumps(df.astype({"DT": object}), iso_dates=True)
13011294

13021295
def test_read_inline_jsonl(self):
13031296
# GH9180

0 commit comments

Comments
 (0)