-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
Implement C Level Timedelta ISO Function; fix JSON usage #30903
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from 25 commits
Commits
Show all changes
39 commits
Select commit
Hold shift + click to select a range
48d2cf0
Working C impl of timedelta ISO
WillAyd aecc34d
Merge remote-tracking branch 'upstream/master' into json-timedelta
WillAyd 9d0384b
More consistent impl
WillAyd fd9675e
shared between python / numpy timedelta
WillAyd 33fc37b
Shared td handling code
WillAyd e2e9995
added tests from cbertinato
WillAyd a2cbd85
shared code
WillAyd 88974b5
working tests
WillAyd 65a727f
reformat
WillAyd 5d84cc3
Expanded test coverage
WillAyd 5445333
fixed test
WillAyd 191d219
better null handling
WillAyd 66a2a43
expanded test
WillAyd 60b5537
removed print
WillAyd dae5336
refactor
WillAyd 88df8bf
fix incorrect test
WillAyd 4146d9f
refactor
WillAyd 24a7910
reformat
WillAyd b1e7da0
more date testing
WillAyd 0046c3c
refactored with bug fix
WillAyd 77b7bae
simplified timedelta test
WillAyd 3ef4aff
Added timedelta coverage
WillAyd 960dce6
stylistic updates
WillAyd 6d2c8da
Removed unneeded timedelta import
WillAyd 9b431f2
Merge remote-tracking branch 'upstream/master' into json-timedelta
WillAyd 4a94f15
style updates
WillAyd 40468bf
replace sprintf with snprintf
WillAyd 0259370
ignore lint errors
WillAyd d1c00e5
Update test_pandas.py
WillAyd 9efb929
Merge remote-tracking branch 'upstream/master' into json-timedelta
WillAyd 9cbc075
Merge remote-tracking branch 'upstream/master' into json-timedelta
WillAyd 29f497f
moved conversion func
WillAyd 35d4a4b
fix note
WillAyd bd2c4db
Merge remote-tracking branch 'upstream/master' into json-timedelta
WillAyd a8423d0
Merge remote-tracking branch 'upstream/master' into json-timedelta
WillAyd ebe58c7
Whatsnew
WillAyd d8f7575
Merge remote-tracking branch 'upstream/master' into json-timedelta
WillAyd 8486e37
Merge remote-tracking branch 'upstream/master' into json-timedelta
WillAyd ef08ad6
more comprehensive testing
WillAyd File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -54,7 +54,6 @@ static PyTypeObject *cls_dataframe; | |
static PyTypeObject *cls_series; | ||
static PyTypeObject *cls_index; | ||
static PyTypeObject *cls_nat; | ||
PyObject *cls_timedelta; | ||
|
||
npy_int64 get_nat(void) { return NPY_MIN_INT64; } | ||
|
||
|
@@ -165,7 +164,6 @@ void *initObjToJSON(void) { | |
cls_index = (PyTypeObject *)PyObject_GetAttrString(mod_pandas, "Index"); | ||
cls_series = | ||
(PyTypeObject *)PyObject_GetAttrString(mod_pandas, "Series"); | ||
cls_timedelta = PyObject_GetAttrString(mod_pandas, "Timedelta"); | ||
Py_DECREF(mod_pandas); | ||
} | ||
|
||
|
@@ -399,6 +397,7 @@ static char *int64ToIso(int64_t value, NPY_DATETIMEUNIT base, size_t *len) { | |
PyErr_SetString(PyExc_ValueError, | ||
"Could not convert datetime value to string"); | ||
PyObject_Free(result); | ||
return NULL; | ||
} | ||
|
||
// Note that get_datetime_iso_8601_strlen just gives a generic size | ||
|
@@ -407,6 +406,30 @@ static char *int64ToIso(int64_t value, NPY_DATETIMEUNIT base, size_t *len) { | |
return result; | ||
} | ||
|
||
/* Converts the int64_t representation of a duration to ISO; mutates len */ | ||
static char *int64ToIsoDuration(int64_t value, size_t *len) { | ||
pandas_timedeltastruct tds; | ||
int ret_code; | ||
|
||
pandas_timedelta_to_timedeltastruct(value, NPY_FR_ns, &tds); | ||
|
||
char *result = PyObject_Malloc(100); // TODO: Better bounds | ||
if (result == NULL) { | ||
PyErr_NoMemory(); | ||
return NULL; | ||
} | ||
|
||
ret_code = make_iso_8601_timedelta(&tds, result, len); | ||
if (ret_code == -1) { | ||
PyErr_SetString(PyExc_ValueError, | ||
"Could not convert timedelta value to string"); | ||
PyObject_Free(result); | ||
return NULL; | ||
} | ||
|
||
return result; | ||
} | ||
|
||
/* JSON callback. returns a char* and mutates the pointer to *len */ | ||
static char *NpyDateTimeToIsoCallback(JSOBJ Py_UNUSED(unused), | ||
JSONTypeContext *tc, size_t *len) { | ||
|
@@ -419,6 +442,12 @@ static npy_datetime NpyDateTimeToEpoch(npy_datetime dt, NPY_DATETIMEUNIT base) { | |
return dt; | ||
} | ||
|
||
/* JSON callback. returns a char* and mutates the pointer to *len */ | ||
static char *NpyTimeDeltaToIsoCallback(JSOBJ Py_UNUSED(unused), | ||
JSONTypeContext *tc, size_t *len) { | ||
return int64ToIsoDuration(GET_TC(tc)->longValue, len); | ||
} | ||
|
||
/* Convert PyDatetime To ISO C-string. mutates len */ | ||
static char *PyDateTimeToIso(PyDateTime_Date *obj, NPY_DATETIMEUNIT base, | ||
size_t *len) { | ||
|
@@ -1504,6 +1533,7 @@ char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc, | |
char **ret; | ||
char *dataptr, *cLabel; | ||
int type_num; | ||
NPY_DATETIMEUNIT base = enc->datetimeUnit; | ||
PRINTMARK(); | ||
|
||
if (!labels) { | ||
|
@@ -1541,60 +1571,64 @@ char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc, | |
break; | ||
} | ||
|
||
// TODO: vectorized timedelta solution | ||
if (enc->datetimeIso && | ||
(type_num == NPY_TIMEDELTA || PyDelta_Check(item))) { | ||
PyObject *td = PyObject_CallFunction(cls_timedelta, "(O)", item); | ||
if (td == NULL) { | ||
Py_DECREF(item); | ||
NpyArr_freeLabels(ret, num); | ||
ret = 0; | ||
break; | ||
} | ||
|
||
PyObject *iso = PyObject_CallMethod(td, "isoformat", NULL); | ||
Py_DECREF(td); | ||
if (iso == NULL) { | ||
Py_DECREF(item); | ||
NpyArr_freeLabels(ret, num); | ||
ret = 0; | ||
break; | ||
} | ||
|
||
cLabel = (char *)PyUnicode_AsUTF8(iso); | ||
Py_DECREF(iso); | ||
len = strlen(cLabel); | ||
} else if (PyTypeNum_ISDATETIME(type_num)) { | ||
NPY_DATETIMEUNIT base = enc->datetimeUnit; | ||
npy_int64 longVal; | ||
int is_datetimelike = 0; | ||
npy_int64 nanosecVal; | ||
if (PyTypeNum_ISDATETIME(type_num)) { | ||
is_datetimelike = 1; | ||
PyArray_VectorUnaryFunc *castfunc = | ||
PyArray_GetCastFunc(PyArray_DescrFromType(type_num), NPY_INT64); | ||
if (!castfunc) { | ||
PyErr_Format(PyExc_ValueError, | ||
"Cannot cast numpy dtype %d to long", | ||
enc->npyType); | ||
} | ||
castfunc(dataptr, &longVal, 1, NULL, NULL); | ||
if (enc->datetimeIso) { | ||
cLabel = int64ToIso(longVal, base, &len); | ||
castfunc(dataptr, &nanosecVal, 1, NULL, NULL); | ||
} else if (PyDate_Check(item) || PyDelta_Check(item)) { | ||
is_datetimelike = 1; | ||
if (PyObject_HasAttrString(item, "value")) { | ||
nanosecVal = get_long_attr(item, "value"); | ||
} else { | ||
if (!scaleNanosecToUnit(&longVal, base)) { | ||
// TODO: This gets hit but somehow doesn't cause errors | ||
// need to clean up (elsewhere in module as well) | ||
if (PyDelta_Check(item)) { | ||
nanosecVal = total_seconds(item) * | ||
1000000000LL; // nanoseconds per second | ||
} else { | ||
// datetime.* objects don't follow above rules | ||
nanosecVal = PyDateTimeToEpoch(item, NPY_FR_ns); | ||
} | ||
cLabel = PyObject_Malloc(21); // 21 chars for int64 | ||
sprintf(cLabel, "%" NPY_INT64_FMT, longVal); | ||
len = strlen(cLabel); | ||
} | ||
} else if (PyDateTime_Check(item) || PyDate_Check(item)) { | ||
NPY_DATETIMEUNIT base = enc->datetimeUnit; | ||
if (enc->datetimeIso) { | ||
cLabel = PyDateTimeToIso((PyDateTime_Date *)item, base, &len); | ||
} | ||
|
||
if (is_datetimelike) { | ||
// JSON requires a string for the index so write "null" | ||
// is there is a standard for this? | ||
if (nanosecVal == get_nat()) { | ||
len = 5; // TODO: shouldn't require extra space for terminator | ||
cLabel = PyObject_Malloc(len); | ||
strncpy(cLabel, "null", len); | ||
} else { | ||
cLabel = PyObject_Malloc(21); // 21 chars for int64 | ||
sprintf(cLabel, "%" NPY_DATETIME_FMT, | ||
PyDateTimeToEpoch(item, base)); | ||
len = strlen(cLabel); | ||
if (enc->datetimeIso) { | ||
if ((type_num == NPY_TIMEDELTA) || (PyDelta_Check(item))) { | ||
cLabel = int64ToIsoDuration(nanosecVal, &len); | ||
} else { | ||
if (type_num == NPY_DATETIME) { | ||
cLabel = int64ToIso(nanosecVal, base, &len); | ||
} else { | ||
cLabel = PyDateTimeToIso((PyDateTime_Date *)item, | ||
base, &len); | ||
} | ||
} | ||
if (cLabel == NULL) { | ||
Py_DECREF(item); | ||
NpyArr_freeLabels(ret, num); | ||
ret = 0; | ||
break; | ||
} | ||
} else { | ||
cLabel = PyObject_Malloc(21); // 21 chars for int64 | ||
sprintf(cLabel, "%" NPY_DATETIME_FMT, | ||
NpyDateTimeToEpoch(nanosecVal, base)); | ||
len = strlen(cLabel); | ||
} | ||
} | ||
} else { // Fallback to string representation | ||
PyObject *str = PyObject_Str(item); | ||
|
@@ -1615,6 +1649,11 @@ char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc, | |
ret[i] = PyObject_Malloc(len + 1); | ||
memcpy(ret[i], cLabel, len + 1); | ||
|
||
if (is_datetimelike) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This fixes a memory leak in the current implementation where any datetimelike values used in the index would leak their respective character representation out; will add a whatsnew once the file is created |
||
// these were created with PyObject_Malloc so free accordingly | ||
PyObject_Free(cLabel); | ||
} | ||
|
||
if (PyErr_Occurred()) { | ||
NpyArr_freeLabels(ret, num); | ||
ret = 0; | ||
|
@@ -1703,7 +1742,11 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) { | |
|
||
if (enc->datetimeIso) { | ||
PRINTMARK(); | ||
pc->PyTypeToUTF8 = NpyDateTimeToIsoCallback; | ||
if (enc->npyType == NPY_TIMEDELTA) { | ||
pc->PyTypeToUTF8 = NpyTimeDeltaToIsoCallback; | ||
} else { | ||
pc->PyTypeToUTF8 = NpyDateTimeToIsoCallback; | ||
} | ||
// Currently no way to pass longVal to iso function, so use | ||
// state management | ||
GET_TC(tc)->longValue = longVal; | ||
|
@@ -1823,28 +1866,31 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) { | |
value = total_seconds(obj) * 1000000000LL; // nanoseconds per second | ||
} | ||
|
||
unit = ((PyObjectEncoder *)tc->encoder)->datetimeUnit; | ||
if (scaleNanosecToUnit(&value, unit) != 0) { | ||
// TODO: Add some kind of error handling here | ||
} | ||
|
||
exc = PyErr_Occurred(); | ||
|
||
if (exc && PyErr_ExceptionMatches(PyExc_OverflowError)) { | ||
PRINTMARK(); | ||
goto INVALID; | ||
} | ||
GET_TC(tc)->longValue = value; | ||
|
||
PRINTMARK(); | ||
if (value == get_nat()) { | ||
PRINTMARK(); | ||
tc->type = JT_NULL; | ||
return; | ||
} | ||
} else if (enc->datetimeIso) { | ||
pc->PyTypeToUTF8 = NpyTimeDeltaToIsoCallback; | ||
tc->type = JT_UTF8; | ||
} else { | ||
unit = ((PyObjectEncoder *)tc->encoder)->datetimeUnit; | ||
if (scaleNanosecToUnit(&(GET_TC(tc)->longValue), unit) != 0) { | ||
// TODO: Add some kind of error handling here | ||
} | ||
|
||
GET_TC(tc)->longValue = value; | ||
exc = PyErr_Occurred(); | ||
|
||
PRINTMARK(); | ||
tc->type = JT_LONG; | ||
if (exc && PyErr_ExceptionMatches(PyExc_OverflowError)) { | ||
PRINTMARK(); | ||
goto INVALID; | ||
} | ||
|
||
tc->type = JT_LONG; | ||
} | ||
return; | ||
} else if (PyArray_IsScalar(obj, Integer)) { | ||
PRINTMARK(); | ||
|
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think after this PR going to split these conversion functions off into a separate module as the JSON one is getting rather big
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The other PR is #31057 should probably be merged first