Skip to content

Commit ef23fc7

Browse files
authored
Fix memory leak with ujson module (#49466)
* Fix memory leak with ujson module * fixups * Whatsnew
1 parent 8b6b867 commit ef23fc7

File tree

3 files changed

+398
-63
lines changed

3 files changed

+398
-63
lines changed

doc/source/whatsnew/v2.0.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -568,7 +568,7 @@ I/O
568568
- Improved error message in :func:`read_excel` by including the offending sheet name when an exception is raised while reading a file (:issue:`48706`)
569569
- Bug when a pickling a subset PyArrow-backed data that would serialize the entire data instead of the subset (:issue:`42600`)
570570
- Bug in :func:`read_csv` for a single-line csv with fewer columns than ``names`` raised :class:`.errors.ParserError` with ``engine="c"`` (:issue:`47566`)
571-
-
571+
- Fixed memory leak which stemmed from the initialization of the internal JSON module (:issue:`49222`)
572572

573573
Period
574574
^^^^^^

pandas/_libs/src/ujson/python/objToJSON.c

+19-54
Original file line numberDiff line numberDiff line change
@@ -50,19 +50,18 @@ Numeric decoder derived from TCL library
5050
#include "date_conversions.h"
5151
#include "datetime.h"
5252

53-
static PyTypeObject *type_decimal;
54-
static PyTypeObject *cls_dataframe;
55-
static PyTypeObject *cls_series;
56-
static PyTypeObject *cls_index;
57-
static PyTypeObject *cls_nat;
58-
static PyTypeObject *cls_na;
59-
PyObject *cls_timedelta;
60-
6153
npy_int64 get_nat(void) { return NPY_MIN_INT64; }
6254

6355
typedef char *(*PFN_PyTypeToUTF8)(JSOBJ obj, JSONTypeContext *ti,
6456
size_t *_outLen);
6557

58+
int object_is_decimal_type(PyObject *obj);
59+
int object_is_dataframe_type(PyObject *obj);
60+
int object_is_series_type(PyObject *obj);
61+
int object_is_index_type(PyObject *obj);
62+
int object_is_nat_type(PyObject *obj);
63+
int object_is_na_type(PyObject *obj);
64+
6665
typedef struct __NpyArrContext {
6766
PyObject *array;
6867
char *dataptr;
@@ -146,44 +145,6 @@ enum PANDAS_FORMAT { SPLIT, RECORDS, INDEX, COLUMNS, VALUES };
146145

147146
int PdBlock_iterNext(JSOBJ, JSONTypeContext *);
148147

149-
void *initObjToJSON(void) {
150-
PyObject *mod_pandas;
151-
PyObject *mod_nattype;
152-
PyObject *mod_natype;
153-
PyObject *mod_decimal = PyImport_ImportModule("decimal");
154-
type_decimal =
155-
(PyTypeObject *)PyObject_GetAttrString(mod_decimal, "Decimal");
156-
Py_DECREF(mod_decimal);
157-
158-
PyDateTime_IMPORT;
159-
160-
mod_pandas = PyImport_ImportModule("pandas");
161-
if (mod_pandas) {
162-
cls_dataframe =
163-
(PyTypeObject *)PyObject_GetAttrString(mod_pandas, "DataFrame");
164-
cls_index = (PyTypeObject *)PyObject_GetAttrString(mod_pandas, "Index");
165-
cls_series =
166-
(PyTypeObject *)PyObject_GetAttrString(mod_pandas, "Series");
167-
Py_DECREF(mod_pandas);
168-
}
169-
170-
mod_nattype = PyImport_ImportModule("pandas._libs.tslibs.nattype");
171-
if (mod_nattype) {
172-
cls_nat =
173-
(PyTypeObject *)PyObject_GetAttrString(mod_nattype, "NaTType");
174-
Py_DECREF(mod_nattype);
175-
}
176-
177-
mod_natype = PyImport_ImportModule("pandas._libs.missing");
178-
if (mod_natype) {
179-
cls_na = (PyTypeObject *)PyObject_GetAttrString(mod_natype, "NAType");
180-
Py_DECREF(mod_natype);
181-
}
182-
183-
// GH 31463
184-
return NULL;
185-
}
186-
187148
static TypeContext *createTypeContext(void) {
188149
TypeContext *pc;
189150

@@ -216,8 +177,7 @@ static TypeContext *createTypeContext(void) {
216177
static PyObject *get_values(PyObject *obj) {
217178
PyObject *values = NULL;
218179

219-
if (PyObject_TypeCheck(obj, cls_index) ||
220-
PyObject_TypeCheck(obj, cls_series)) {
180+
if (object_is_index_type(obj) || object_is_series_type(obj)) {
221181
// The special cases to worry about are dt64tz and category[dt64tz].
222182
// In both cases we want the UTC-localized datetime64 ndarray,
223183
// without going through and object array of Timestamps.
@@ -1510,12 +1470,12 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
15101470
pc->PyTypeToUTF8 = PyUnicodeToUTF8;
15111471
tc->type = JT_UTF8;
15121472
return;
1513-
} else if (PyObject_TypeCheck(obj, type_decimal)) {
1473+
} else if (object_is_decimal_type(obj)) {
15141474
GET_TC(tc)->doubleValue = PyFloat_AsDouble(obj);
15151475
tc->type = JT_DOUBLE;
15161476
return;
15171477
} else if (PyDateTime_Check(obj) || PyDate_Check(obj)) {
1518-
if (PyObject_TypeCheck(obj, cls_nat)) {
1478+
if (object_is_nat_type(obj)) {
15191479
tc->type = JT_NULL;
15201480
return;
15211481
}
@@ -1606,14 +1566,14 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
16061566
"%R (0d array) is not JSON serializable at the moment",
16071567
obj);
16081568
goto INVALID;
1609-
} else if (PyObject_TypeCheck(obj, cls_na)) {
1569+
} else if (object_is_na_type(obj)) {
16101570
tc->type = JT_NULL;
16111571
return;
16121572
}
16131573

16141574
ISITERABLE:
16151575

1616-
if (PyObject_TypeCheck(obj, cls_index)) {
1576+
if (object_is_index_type(obj)) {
16171577
if (enc->outputFormat == SPLIT) {
16181578
tc->type = JT_OBJECT;
16191579
pc->iterBegin = Index_iterBegin;
@@ -1637,7 +1597,7 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
16371597
}
16381598

16391599
return;
1640-
} else if (PyObject_TypeCheck(obj, cls_series)) {
1600+
} else if (object_is_series_type(obj)) {
16411601
if (enc->outputFormat == SPLIT) {
16421602
tc->type = JT_OBJECT;
16431603
pc->iterBegin = Series_iterBegin;
@@ -1701,7 +1661,7 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
17011661
pc->iterGetValue = NpyArr_iterGetValue;
17021662
pc->iterGetName = NpyArr_iterGetName;
17031663
return;
1704-
} else if (PyObject_TypeCheck(obj, cls_dataframe)) {
1664+
} else if (object_is_dataframe_type(obj)) {
17051665
if (enc->blkCtxtPassthru) {
17061666
pc->pdblock = enc->blkCtxtPassthru;
17071667
tc->type =
@@ -1969,6 +1929,11 @@ char *Object_iterGetName(JSOBJ obj, JSONTypeContext *tc, size_t *outLen) {
19691929

19701930
PyObject *objToJSON(PyObject *Py_UNUSED(self), PyObject *args,
19711931
PyObject *kwargs) {
1932+
PyDateTime_IMPORT;
1933+
if (PyDateTimeAPI == NULL) {
1934+
return NULL;
1935+
}
1936+
19721937
static char *kwlist[] = {"obj",
19731938
"ensure_ascii",
19741939
"double_precision",

0 commit comments

Comments
 (0)