Skip to content

Commit 817b706

Browse files
authored
ENH: Format decimal.Decimal as full precision strings in .to_json(...) (#60698)
* Format decimal.Decimal as full precision strings in .to_json(...) * Fix failing tests * Clean up Decimal to utf8 convertion and switch to using PyObject_Format() to suppress scientific notation * Add whatsnew entry
1 parent 8bc8c0a commit 817b706

File tree

5 files changed

+52
-25
lines changed

5 files changed

+52
-25
lines changed

doc/source/whatsnew/v3.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ Other enhancements
5353
- :meth:`DataFrame.ewm` now allows ``adjust=False`` when ``times`` is provided (:issue:`54328`)
5454
- :meth:`DataFrame.fillna` and :meth:`Series.fillna` can now accept ``value=None``; for non-object dtype the corresponding NA value will be used (:issue:`57723`)
5555
- :meth:`DataFrame.pivot_table` and :func:`pivot_table` now allow the passing of keyword arguments to ``aggfunc`` through ``**kwargs`` (:issue:`57884`)
56+
- :meth:`DataFrame.to_json` now encodes ``Decimal`` as strings instead of floats (:issue:`60698`)
5657
- :meth:`Series.cummin` and :meth:`Series.cummax` now supports :class:`CategoricalDtype` (:issue:`52335`)
5758
- :meth:`Series.plot` now correctly handle the ``ylabel`` parameter for pie charts, allowing for explicit control over the y-axis label (:issue:`58239`)
5859
- :meth:`DataFrame.plot.scatter` argument ``c`` now accepts a column of strings, where rows with the same string are colored identically (:issue:`16827` and :issue:`16485`)

pandas/_libs/src/vendored/ujson/python/objToJSON.c

+33-2
Original file line numberDiff line numberDiff line change
@@ -373,6 +373,27 @@ static char *PyTimeToJSON(JSOBJ _obj, JSONTypeContext *tc, size_t *outLen) {
373373
return outValue;
374374
}
375375

376+
static char *PyDecimalToUTF8Callback(JSOBJ _obj, JSONTypeContext *tc,
377+
size_t *len) {
378+
PyObject *obj = (PyObject *)_obj;
379+
PyObject *format_spec = PyUnicode_FromStringAndSize("f", 1);
380+
PyObject *str = PyObject_Format(obj, format_spec);
381+
Py_DECREF(format_spec);
382+
383+
if (str == NULL) {
384+
((JSONObjectEncoder *)tc->encoder)->errorMsg = "";
385+
return NULL;
386+
}
387+
388+
GET_TC(tc)->newObj = str;
389+
390+
Py_ssize_t s_len;
391+
char *outValue = (char *)PyUnicode_AsUTF8AndSize(str, &s_len);
392+
*len = s_len;
393+
394+
return outValue;
395+
}
396+
376397
//=============================================================================
377398
// Numpy array iteration functions
378399
//=============================================================================
@@ -1467,8 +1488,18 @@ static void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
14671488
tc->type = JT_UTF8;
14681489
return;
14691490
} else if (object_is_decimal_type(obj)) {
1470-
pc->doubleValue = PyFloat_AsDouble(obj);
1471-
tc->type = JT_DOUBLE;
1491+
PyObject *is_nan_py = PyObject_RichCompare(obj, obj, Py_NE);
1492+
if (is_nan_py == NULL) {
1493+
goto INVALID;
1494+
}
1495+
int is_nan = (is_nan_py == Py_True);
1496+
Py_DECREF(is_nan_py);
1497+
if (is_nan) {
1498+
tc->type = JT_NULL;
1499+
return;
1500+
}
1501+
pc->PyTypeToUTF8 = PyDecimalToUTF8Callback;
1502+
tc->type = JT_UTF8;
14721503
return;
14731504
} else if (PyDateTime_Check(obj) || PyDate_Check(obj)) {
14741505
if (object_is_nat_type(obj)) {

pandas/tests/io/json/test_json_table_schema_ext_dtype.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ def test_build_decimal_series(self, dc):
159159
expected = OrderedDict(
160160
[
161161
("schema", schema),
162-
("data", [OrderedDict([("id", 0), ("a", 10.0)])]),
162+
("data", [OrderedDict([("id", 0), ("a", "10")])]),
163163
]
164164
)
165165

@@ -245,7 +245,7 @@ def test_to_json(self, da, dc, sa, ia):
245245
[
246246
("idx", 0),
247247
("A", "2021-10-10T00:00:00.000"),
248-
("B", 10.0),
248+
("B", "10"),
249249
("C", "pandas"),
250250
("D", 10),
251251
]

pandas/tests/io/json/test_pandas.py

+1-6
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
import datetime
22
from datetime import timedelta
3-
from decimal import Decimal
43
from io import StringIO
54
import json
65
import os
@@ -2025,12 +2024,8 @@ def test_to_s3(self, s3_public_bucket, s3so):
20252024
timeout -= 0.1
20262025
assert timeout > 0, "Timed out waiting for file to appear on moto"
20272026

2028-
def test_json_pandas_nulls(self, nulls_fixture, request):
2027+
def test_json_pandas_nulls(self, nulls_fixture):
20292028
# GH 31615
2030-
if isinstance(nulls_fixture, Decimal):
2031-
mark = pytest.mark.xfail(reason="not implemented")
2032-
request.applymarker(mark)
2033-
20342029
expected_warning = None
20352030
msg = (
20362031
"The default 'epoch' date format is deprecated and will be removed "

pandas/tests/io/json/test_ujson.py

+15-15
Original file line numberDiff line numberDiff line change
@@ -57,56 +57,56 @@ def test_encode_decimal(self):
5757
sut = decimal.Decimal("1337.1337")
5858
encoded = ujson.ujson_dumps(sut, double_precision=15)
5959
decoded = ujson.ujson_loads(encoded)
60-
assert decoded == 1337.1337
60+
assert decoded == "1337.1337"
6161

6262
sut = decimal.Decimal("0.95")
6363
encoded = ujson.ujson_dumps(sut, double_precision=1)
64-
assert encoded == "1.0"
64+
assert encoded == '"0.95"'
6565

6666
decoded = ujson.ujson_loads(encoded)
67-
assert decoded == 1.0
67+
assert decoded == "0.95"
6868

6969
sut = decimal.Decimal("0.94")
7070
encoded = ujson.ujson_dumps(sut, double_precision=1)
71-
assert encoded == "0.9"
71+
assert encoded == '"0.94"'
7272

7373
decoded = ujson.ujson_loads(encoded)
74-
assert decoded == 0.9
74+
assert decoded == "0.94"
7575

7676
sut = decimal.Decimal("1.95")
7777
encoded = ujson.ujson_dumps(sut, double_precision=1)
78-
assert encoded == "2.0"
78+
assert encoded == '"1.95"'
7979

8080
decoded = ujson.ujson_loads(encoded)
81-
assert decoded == 2.0
81+
assert decoded == "1.95"
8282

8383
sut = decimal.Decimal("-1.95")
8484
encoded = ujson.ujson_dumps(sut, double_precision=1)
85-
assert encoded == "-2.0"
85+
assert encoded == '"-1.95"'
8686

8787
decoded = ujson.ujson_loads(encoded)
88-
assert decoded == -2.0
88+
assert decoded == "-1.95"
8989

9090
sut = decimal.Decimal("0.995")
9191
encoded = ujson.ujson_dumps(sut, double_precision=2)
92-
assert encoded == "1.0"
92+
assert encoded == '"0.995"'
9393

9494
decoded = ujson.ujson_loads(encoded)
95-
assert decoded == 1.0
95+
assert decoded == "0.995"
9696

9797
sut = decimal.Decimal("0.9995")
9898
encoded = ujson.ujson_dumps(sut, double_precision=3)
99-
assert encoded == "1.0"
99+
assert encoded == '"0.9995"'
100100

101101
decoded = ujson.ujson_loads(encoded)
102-
assert decoded == 1.0
102+
assert decoded == "0.9995"
103103

104104
sut = decimal.Decimal("0.99999999999999944")
105105
encoded = ujson.ujson_dumps(sut, double_precision=15)
106-
assert encoded == "1.0"
106+
assert encoded == '"0.99999999999999944"'
107107

108108
decoded = ujson.ujson_loads(encoded)
109-
assert decoded == 1.0
109+
assert decoded == "0.99999999999999944"
110110

111111
@pytest.mark.parametrize("ensure_ascii", [True, False])
112112
def test_encode_string_conversion(self, ensure_ascii):

0 commit comments

Comments
 (0)