From 60db2115b6cc61a4a174642c25ab4da73837ae4f Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 22 May 2023 16:23:51 -0700 Subject: [PATCH 1/4] Working refactor with setuptools --- .pre-commit-config.yaml | 6 +- MANIFEST.in | 5 +- .../pandas}/datetime/date_conversions.h | 0 .../pandas}/datetime/pd_datetime.h | 6 +- .../{src => include/pandas}/inline_helper.h | 0 .../_libs/{src => include/pandas}/parser/io.h | 0 .../{ => include/pandas/parser}/pd_parser.h | 2 +- .../pandas}/parser/tokenizer.h | 6 +- .../headers => include/pandas}/portable.h | 11 +- .../_libs/{src => include/pandas}/skiplist.h | 2 +- .../pandas/vendored}/klib/khash.h | 2 +- .../pandas/vendored}/klib/khash_python.h | 0 .../vendored/numpy}/datetime/np_datetime.h | 0 .../numpy}/datetime/np_datetime_strings.h | 0 .../pandas/vendored}/ujson/lib/ultrajson.h | 2 +- .../pandas/vendored}/ujson/python/version.h | 0 pandas/_libs/khash.pxd | 2 +- .../_libs/khash_for_primitive_helper.pxi.in | 2 +- pandas/_libs/lib.pyx | 2 +- pandas/_libs/parsers.pyx | 6 +- pandas/_libs/pd_parser.c | 178 -- pandas/_libs/src/parser/io.c | 2 +- pandas/_libs/src/parser/tokenizer.c | 4 +- pandas/_libs/src/ujson/lib/ultrajsondec.c | 1208 ---------- pandas/_libs/src/ujson/lib/ultrajsonenc.c | 1207 ---------- pandas/_libs/src/ujson/python/JSONtoObj.c | 520 ---- pandas/_libs/src/ujson/python/objToJSON.c | 2135 ----------------- pandas/_libs/src/ujson/python/ujson.c | 451 ---- pandas/_libs/tslibs/np_datetime.pxd | 2 +- pandas/_libs/tslibs/np_datetime.pyx | 2 +- pandas/_libs/tslibs/parsing.pyx | 4 +- .../tslibs/src/datetime/date_conversions.c | 100 - .../_libs/tslibs/src/datetime/np_datetime.c | 947 -------- .../tslibs/src/datetime/np_datetime_strings.c | 1150 --------- .../_libs/tslibs/src/datetime/pd_datetime.c | 253 -- pandas/_libs/window/aggregations.pyx | 2 +- setup.py | 87 +- 37 files changed, 71 insertions(+), 8235 deletions(-) rename pandas/_libs/{tslibs/src => include/pandas}/datetime/date_conversions.h (100%) rename pandas/_libs/{tslibs/src => include/pandas}/datetime/pd_datetime.h (97%) rename pandas/_libs/{src => include/pandas}/inline_helper.h (100%) rename pandas/_libs/{src => include/pandas}/parser/io.h (100%) rename pandas/_libs/{ => include/pandas/parser}/pd_parser.h (99%) rename pandas/_libs/{src => include/pandas}/parser/tokenizer.h (98%) rename pandas/_libs/{src/headers => include/pandas}/portable.h (67%) rename pandas/_libs/{src => include/pandas}/skiplist.h (99%) rename pandas/_libs/{src => include/pandas/vendored}/klib/khash.h (99%) rename pandas/_libs/{src => include/pandas/vendored}/klib/khash_python.h (100%) rename pandas/_libs/{tslibs/src => include/pandas/vendored/numpy}/datetime/np_datetime.h (100%) rename pandas/_libs/{tslibs/src => include/pandas/vendored/numpy}/datetime/np_datetime_strings.h (100%) rename pandas/_libs/{src => include/pandas/vendored}/ujson/lib/ultrajson.h (99%) rename pandas/_libs/{src => include/pandas/vendored}/ujson/python/version.h (100%) delete mode 100644 pandas/_libs/pd_parser.c delete mode 100644 pandas/_libs/src/ujson/lib/ultrajsondec.c delete mode 100644 pandas/_libs/src/ujson/lib/ultrajsonenc.c delete mode 100644 pandas/_libs/src/ujson/python/JSONtoObj.c delete mode 100644 pandas/_libs/src/ujson/python/objToJSON.c delete mode 100644 pandas/_libs/src/ujson/python/ujson.c delete mode 100644 pandas/_libs/tslibs/src/datetime/date_conversions.c delete mode 100644 pandas/_libs/tslibs/src/datetime/np_datetime.c delete mode 100644 pandas/_libs/tslibs/src/datetime/np_datetime_strings.c delete mode 100644 pandas/_libs/tslibs/src/datetime/pd_datetime.c diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e3ebdb859319a..de4c6687fbc0f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -65,11 +65,7 @@ repos: rev: 1.6.1 hooks: - id: cpplint - # We don't lint all C files because we don't want to lint any that are built - # from Cython files nor do we want to lint C files that we didn't modify for - # this particular codebase (e.g. src/headers, src/klib). However, - # we can lint all header files since they aren't "generated" like C files are. - exclude: ^pandas/_libs/src/(klib|headers)/ + exclude: ^pandas/_libs/include/pandas/vendored/klib args: [ --quiet, '--extensions=c,h', diff --git a/MANIFEST.in b/MANIFEST.in index 361cd8ff9ec22..0846cc3690c47 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -56,7 +56,4 @@ global-exclude *.pxi prune pandas/tests/io/parser/data # Selectively re-add *.cxx files that were excluded above -graft pandas/_libs/src -graft pandas/_libs/tslibs/src -include pandas/_libs/pd_parser.h -include pandas/_libs/pd_parser.c +graft pandas/_libs/include diff --git a/pandas/_libs/tslibs/src/datetime/date_conversions.h b/pandas/_libs/include/pandas/datetime/date_conversions.h similarity index 100% rename from pandas/_libs/tslibs/src/datetime/date_conversions.h rename to pandas/_libs/include/pandas/datetime/date_conversions.h diff --git a/pandas/_libs/tslibs/src/datetime/pd_datetime.h b/pandas/_libs/include/pandas/datetime/pd_datetime.h similarity index 97% rename from pandas/_libs/tslibs/src/datetime/pd_datetime.h rename to pandas/_libs/include/pandas/datetime/pd_datetime.h index 4e3baf4b47ed0..55aa046cf076b 100644 --- a/pandas/_libs/tslibs/src/datetime/pd_datetime.h +++ b/pandas/_libs/include/pandas/datetime/pd_datetime.h @@ -22,9 +22,9 @@ See NUMPY_LICENSE.txt for the license. #endif // NPY_NO_DEPRECATED_API #include -#include "np_datetime.h" -#include "np_datetime_strings.h" -#include "date_conversions.h" +#include "pandas/vendored/numpy/datetime/np_datetime.h" +#include "pandas/vendored/numpy/datetime/np_datetime_strings.h" +#include "pandas/datetime/date_conversions.h" #ifdef __cplusplus extern "C" { diff --git a/pandas/_libs/src/inline_helper.h b/pandas/_libs/include/pandas/inline_helper.h similarity index 100% rename from pandas/_libs/src/inline_helper.h rename to pandas/_libs/include/pandas/inline_helper.h diff --git a/pandas/_libs/src/parser/io.h b/pandas/_libs/include/pandas/parser/io.h similarity index 100% rename from pandas/_libs/src/parser/io.h rename to pandas/_libs/include/pandas/parser/io.h diff --git a/pandas/_libs/pd_parser.h b/pandas/_libs/include/pandas/parser/pd_parser.h similarity index 99% rename from pandas/_libs/pd_parser.h rename to pandas/_libs/include/pandas/parser/pd_parser.h index 72254090c0056..1ea94fde593ef 100644 --- a/pandas/_libs/pd_parser.h +++ b/pandas/_libs/include/pandas/parser/pd_parser.h @@ -14,7 +14,7 @@ extern "C" { #define PY_SSIZE_T_CLEAN #include -#include "src/parser/tokenizer.h" +#include "pandas/parser/tokenizer.h" typedef struct { int (*to_double)(char *, double *, char, char, int *); diff --git a/pandas/_libs/src/parser/tokenizer.h b/pandas/_libs/include/pandas/parser/tokenizer.h similarity index 98% rename from pandas/_libs/src/parser/tokenizer.h rename to pandas/_libs/include/pandas/parser/tokenizer.h index 7e8c3d102ac63..a53d09012116d 100644 --- a/pandas/_libs/src/parser/tokenizer.h +++ b/pandas/_libs/include/pandas/parser/tokenizer.h @@ -19,10 +19,10 @@ See LICENSE for the license #define ERROR_INVALID_CHARS 3 #include -#include "../inline_helper.h" -#include "../headers/portable.h" +#include "pandas/inline_helper.h" +#include "pandas/portable.h" -#include "khash.h" +#include "pandas/vendored/klib/khash.h" #define STREAM_INIT_SIZE 32 diff --git a/pandas/_libs/src/headers/portable.h b/pandas/_libs/include/pandas/portable.h similarity index 67% rename from pandas/_libs/src/headers/portable.h rename to pandas/_libs/include/pandas/portable.h index a34f833b7fd6b..954b5c3cce082 100644 --- a/pandas/_libs/src/headers/portable.h +++ b/pandas/_libs/include/pandas/portable.h @@ -1,9 +1,18 @@ +/* +Copyright (c) 2016, PyData Development Team +All rights reserved. + +Distributed under the terms of the BSD Simplified License. + +The full license is in the LICENSE file, distributed with this software. +*/ + #pragma once #include #if defined(_MSC_VER) -#define strcasecmp( s1, s2 ) _stricmp( s1, s2 ) +#define strcasecmp(s1, s2) _stricmp(s1, s2) #endif // GH-23516 - works around locale perf issues diff --git a/pandas/_libs/src/skiplist.h b/pandas/_libs/include/pandas/skiplist.h similarity index 99% rename from pandas/_libs/src/skiplist.h rename to pandas/_libs/include/pandas/skiplist.h index d94099da5890e..3be9e51f42e09 100644 --- a/pandas/_libs/src/skiplist.h +++ b/pandas/_libs/include/pandas/skiplist.h @@ -19,7 +19,7 @@ Python recipe (https://rhettinger.wordpress.com/2010/02/06/lost-knowledge/) #include #include #include -#include "inline_helper.h" +#include "pandas/inline_helper.h" PANDAS_INLINE float __skiplist_nanf(void) { const union { diff --git a/pandas/_libs/src/klib/khash.h b/pandas/_libs/include/pandas/vendored/klib/khash.h similarity index 99% rename from pandas/_libs/src/klib/khash.h rename to pandas/_libs/include/pandas/vendored/klib/khash.h index e17d82d51f0fb..95b25d053a9df 100644 --- a/pandas/_libs/src/klib/khash.h +++ b/pandas/_libs/include/pandas/vendored/klib/khash.h @@ -112,7 +112,7 @@ int main() { #include #include #include -#include "../inline_helper.h" +#include "pandas/inline_helper.h" // hooks for memory allocator, C-runtime allocator used per default diff --git a/pandas/_libs/src/klib/khash_python.h b/pandas/_libs/include/pandas/vendored/klib/khash_python.h similarity index 100% rename from pandas/_libs/src/klib/khash_python.h rename to pandas/_libs/include/pandas/vendored/klib/khash_python.h diff --git a/pandas/_libs/tslibs/src/datetime/np_datetime.h b/pandas/_libs/include/pandas/vendored/numpy/datetime/np_datetime.h similarity index 100% rename from pandas/_libs/tslibs/src/datetime/np_datetime.h rename to pandas/_libs/include/pandas/vendored/numpy/datetime/np_datetime.h diff --git a/pandas/_libs/tslibs/src/datetime/np_datetime_strings.h b/pandas/_libs/include/pandas/vendored/numpy/datetime/np_datetime_strings.h similarity index 100% rename from pandas/_libs/tslibs/src/datetime/np_datetime_strings.h rename to pandas/_libs/include/pandas/vendored/numpy/datetime/np_datetime_strings.h diff --git a/pandas/_libs/src/ujson/lib/ultrajson.h b/pandas/_libs/include/pandas/vendored/ujson/lib/ultrajson.h similarity index 99% rename from pandas/_libs/src/ujson/lib/ultrajson.h rename to pandas/_libs/include/pandas/vendored/ujson/lib/ultrajson.h index d359cf27ff7e2..54bcca9e4136c 100644 --- a/pandas/_libs/src/ujson/lib/ultrajson.h +++ b/pandas/_libs/include/pandas/vendored/ujson/lib/ultrajson.h @@ -53,7 +53,7 @@ tree doesn't have cyclic references. #include #include -#include "../../headers/portable.h" +#include "pandas/portable.h" // Don't output any extra whitespaces when encoding #define JSON_NO_EXTRA_WHITESPACE diff --git a/pandas/_libs/src/ujson/python/version.h b/pandas/_libs/include/pandas/vendored/ujson/python/version.h similarity index 100% rename from pandas/_libs/src/ujson/python/version.h rename to pandas/_libs/include/pandas/vendored/ujson/python/version.h diff --git a/pandas/_libs/khash.pxd b/pandas/_libs/khash.pxd index a9f819e5e16db..c439e1cca772b 100644 --- a/pandas/_libs/khash.pxd +++ b/pandas/_libs/khash.pxd @@ -15,7 +15,7 @@ from numpy cimport ( ) -cdef extern from "khash_python.h": +cdef extern from "pandas/vendored/klib/khash_python.h": const int KHASH_TRACE_DOMAIN ctypedef uint32_t khuint_t diff --git a/pandas/_libs/khash_for_primitive_helper.pxi.in b/pandas/_libs/khash_for_primitive_helper.pxi.in index d0934b3e0ee6e..d3391d4028938 100644 --- a/pandas/_libs/khash_for_primitive_helper.pxi.in +++ b/pandas/_libs/khash_for_primitive_helper.pxi.in @@ -24,7 +24,7 @@ primitive_types = [('int64', 'int64_t'), {{for name, c_type in primitive_types}} -cdef extern from "khash_python.h": +cdef extern from "pandas/vendored/klib/khash_python.h": ctypedef struct kh_{{name}}_t: khuint_t n_buckets, size, n_occupied, upper_bound uint32_t *flags diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index bc2886e5b531c..27e5a29e4aeca 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -93,7 +93,7 @@ cdef extern from "numpy/arrayobject.h": cdef extern from "numpy/ndarrayobject.h": bint PyArray_CheckScalar(obj) nogil -cdef extern from "pd_parser.h": +cdef extern from "pandas/parser/pd_parser.h": int floatify(object, float64_t *result, int *maybe_int) except -1 void PandasParser_IMPORT() diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index a45299c8ba896..10b2427835ed1 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -117,7 +117,7 @@ cdef: int64_t DEFAULT_CHUNKSIZE = 256 * 1024 -cdef extern from "headers/portable.h": +cdef extern from "pandas/portable.h": # I *think* this is here so that strcasecmp is defined on Windows # so we don't get # `parsers.obj : error LNK2001: unresolved external symbol strcasecmp` @@ -127,7 +127,7 @@ cdef extern from "headers/portable.h": pass -cdef extern from "parser/tokenizer.h": +cdef extern from "pandas/parser/tokenizer.h": ctypedef enum ParserState: START_RECORD @@ -245,7 +245,7 @@ cdef extern from "parser/tokenizer.h": void COLITER_NEXT(coliter_t, const char *) nogil -cdef extern from "pd_parser.h": +cdef extern from "pandas/parser/pd_parser.h": void *new_rd_source(object obj) except NULL int del_rd_source(void *src) diff --git a/pandas/_libs/pd_parser.c b/pandas/_libs/pd_parser.c deleted file mode 100644 index 15d82b59df3e8..0000000000000 --- a/pandas/_libs/pd_parser.c +++ /dev/null @@ -1,178 +0,0 @@ -/* - -Copyright (c) 2023, PyData Development Team -All rights reserved. - -Distributed under the terms of the BSD Simplified License. - -*/ -#define _PANDAS_PARSER_IMPL - -#include "pd_parser.h" -#include "src/parser/io.h" - -static int to_double(char *item, double *p_value, char sci, char decimal, - int *maybe_int) { - char *p_end = NULL; - int error = 0; - - /* Switch to precise xstrtod GH 31364 */ - *p_value = - precise_xstrtod(item, &p_end, decimal, sci, '\0', 1, &error, maybe_int); - - return (error == 0) && (!*p_end); -} - -static int floatify(PyObject *str, double *result, int *maybe_int) { - int status; - char *data; - PyObject *tmp = NULL; - const char sci = 'E'; - const char dec = '.'; - - if (PyBytes_Check(str)) { - data = PyBytes_AS_STRING(str); - } else if (PyUnicode_Check(str)) { - tmp = PyUnicode_AsUTF8String(str); - if (tmp == NULL) { - return -1; - } - data = PyBytes_AS_STRING(tmp); - } else { - PyErr_SetString(PyExc_TypeError, "Invalid object type"); - return -1; - } - - status = to_double(data, result, sci, dec, maybe_int); - - if (!status) { - /* handle inf/-inf infinity/-infinity */ - if (strlen(data) == 3) { - if (0 == strcasecmp(data, "inf")) { - *result = HUGE_VAL; - *maybe_int = 0; - } else { - goto parsingerror; - } - } else if (strlen(data) == 4) { - if (0 == strcasecmp(data, "-inf")) { - *result = -HUGE_VAL; - *maybe_int = 0; - } else if (0 == strcasecmp(data, "+inf")) { - *result = HUGE_VAL; - *maybe_int = 0; - } else { - goto parsingerror; - } - } else if (strlen(data) == 8) { - if (0 == strcasecmp(data, "infinity")) { - *result = HUGE_VAL; - *maybe_int = 0; - } else { - goto parsingerror; - } - } else if (strlen(data) == 9) { - if (0 == strcasecmp(data, "-infinity")) { - *result = -HUGE_VAL; - *maybe_int = 0; - } else if (0 == strcasecmp(data, "+infinity")) { - *result = HUGE_VAL; - *maybe_int = 0; - } else { - goto parsingerror; - } - } else { - goto parsingerror; - } - } - - Py_XDECREF(tmp); - return 0; - -parsingerror: - PyErr_Format(PyExc_ValueError, "Unable to parse string \"%s\"", data); - Py_XDECREF(tmp); - return -1; -} - - -static void pandas_parser_destructor(PyObject *op) { - void *ptr = PyCapsule_GetPointer(op, PandasParser_CAPSULE_NAME); - PyMem_Free(ptr); -} - -static int pandas_parser_exec(PyObject *module) { - PandasParser_CAPI *capi = PyMem_Malloc(sizeof(PandasParser_CAPI)); - if (capi == NULL) { - PyErr_NoMemory(); - return -1; - } - - capi->to_double = to_double; - capi->floatify = floatify; - capi->new_rd_source = new_rd_source; - capi->del_rd_source = del_rd_source; - capi->buffer_rd_bytes = buffer_rd_bytes; - capi->uint_state_init = uint_state_init; - capi->uint64_conflict = uint64_conflict; - capi->coliter_setup = coliter_setup; - capi->parser_new = parser_new; - capi->parser_init = parser_init; - capi->parser_free = parser_free; - capi->parser_del = parser_del; - capi->parser_add_skiprow = parser_add_skiprow; - capi->parser_set_skipfirstnrows = parser_set_skipfirstnrows; - capi->parser_set_default_options = parser_set_default_options; - capi->parser_consume_rows = parser_consume_rows; - capi->parser_trim_buffers = parser_trim_buffers; - capi->tokenize_all_rows = tokenize_all_rows; - capi->tokenize_nrows = tokenize_nrows; - capi->str_to_int64 = str_to_int64; - capi->str_to_uint64 = str_to_uint64; - capi->xstrtod = xstrtod; - capi->precise_xstrtod = precise_xstrtod; - capi->round_trip = round_trip; - capi->to_boolean = to_boolean; - - PyObject *capsule = - PyCapsule_New(capi, PandasParser_CAPSULE_NAME, pandas_parser_destructor); - if (capsule == NULL) { - PyMem_Free(capi); - return -1; - } - - // Monkeypatch the top level pandas module to have an attribute for the - // C-API. This is required because Python capsules do not support setting - // this attribute on anything but the top level package. Ideally not - // done when cpython gh-6898 gets implemented - PyObject *pandas = PyImport_ImportModule("pandas"); - if (!pandas) { - PyErr_SetString(PyExc_ImportError, - "pd_parser.c could not import module pandas"); - Py_DECREF(capsule); - return -1; - } - - if (PyModule_AddObject(pandas, "_pandas_parser_CAPI", capsule) < 0) { - Py_DECREF(capsule); - return -1; - } - - return 0; -} - -static PyModuleDef_Slot pandas_parser_slots[] = { - {Py_mod_exec, pandas_parser_exec}, {0, NULL}}; - -static struct PyModuleDef pandas_parsermodule = { - PyModuleDef_HEAD_INIT, - .m_name = "pandas._libs.pandas_parser", - - .m_doc = "Internal module with parser support for other extensions", - .m_size = 0, - .m_methods = NULL, - .m_slots = pandas_parser_slots}; - -PyMODINIT_FUNC PyInit_pandas_parser(void) { - return PyModuleDef_Init(&pandas_parsermodule); -} diff --git a/pandas/_libs/src/parser/io.c b/pandas/_libs/src/parser/io.c index 38304cca94a12..e00c5c1e807a7 100644 --- a/pandas/_libs/src/parser/io.c +++ b/pandas/_libs/src/parser/io.c @@ -7,7 +7,7 @@ Distributed under the terms of the BSD Simplified License. The full license is in the LICENSE file, distributed with this software. */ -#include "io.h" +#include "pandas/parser/io.h" /* On-disk FILE, uncompressed diff --git a/pandas/_libs/src/parser/tokenizer.c b/pandas/_libs/src/parser/tokenizer.c index e60fc6bf75f91..abd3fb9e1fef3 100644 --- a/pandas/_libs/src/parser/tokenizer.c +++ b/pandas/_libs/src/parser/tokenizer.c @@ -17,13 +17,13 @@ GitHub. See Python Software Foundation License and BSD licenses for these. */ -#include "tokenizer.h" +#include "pandas/parser/tokenizer.h" #include #include #include -#include "../headers/portable.h" +#include "pandas/portable.h" void coliter_setup(coliter_t *self, parser_t *parser, int64_t i, int64_t start) { diff --git a/pandas/_libs/src/ujson/lib/ultrajsondec.c b/pandas/_libs/src/ujson/lib/ultrajsondec.c deleted file mode 100644 index 5347db1655669..0000000000000 --- a/pandas/_libs/src/ujson/lib/ultrajsondec.c +++ /dev/null @@ -1,1208 +0,0 @@ -/* -Copyright (c) 2011-2013, ESN Social Software AB and Jonas Tarnstrom -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: -* Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. -* Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -* Neither the name of the ESN Social Software AB nor the -names of its contributors may be used to endorse or promote products -derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL ESN SOCIAL SOFTWARE AB OR JONAS TARNSTROM BE -LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - -Portions of code from MODP_ASCII - Ascii transformations (upper/lower, etc) -https://github.com/client9/stringencoders -Copyright (c) 2007 Nick Galbreath -- nickg [at] modp [dot] com. All rights -reserved. - -Numeric decoder derived from TCL library -https://www.opensource.apple.com/source/tcl/tcl-14/tcl/license.terms -* Copyright (c) 1988-1993 The Regents of the University of California. -* Copyright (c) 1994 Sun Microsystems, Inc. -*/ - -#include -#include -#include -#include -#include -#include -#include -#include -#include "ultrajson.h" - -#ifndef TRUE -#define TRUE 1 -#define FALSE 0 -#endif -#ifndef NULL -#define NULL 0 -#endif - -struct DecoderState { - char *start; - char *end; - wchar_t *escStart; - wchar_t *escEnd; - int escHeap; - int lastType; - JSUINT32 objDepth; - void *prv; - JSONObjectDecoder *dec; -}; - -JSOBJ FASTCALL_MSVC decode_any(struct DecoderState *ds); -typedef JSOBJ (*PFN_DECODER)(struct DecoderState *ds); - -static JSOBJ SetError(struct DecoderState *ds, int offset, - const char *message) { - ds->dec->errorOffset = ds->start + offset; - ds->dec->errorStr = (char *)message; - return NULL; -} - -double createDouble(double intNeg, double intValue, double frcValue, - int frcDecimalCount) { - static const double g_pow10[] = {1.0, - 0.1, - 0.01, - 0.001, - 0.0001, - 0.00001, - 0.000001, - 0.0000001, - 0.00000001, - 0.000000001, - 0.0000000001, - 0.00000000001, - 0.000000000001, - 0.0000000000001, - 0.00000000000001, - 0.000000000000001}; - return (intValue + (frcValue * g_pow10[frcDecimalCount])) * intNeg; -} - -JSOBJ FASTCALL_MSVC decodePreciseFloat(struct DecoderState *ds) { - char *end; - double value; - errno = 0; - - value = strtod(ds->start, &end); - - if (errno == ERANGE) { - return SetError(ds, -1, "Range error when decoding numeric as double"); - } - - ds->start = end; - return ds->dec->newDouble(ds->prv, value); -} - -JSOBJ FASTCALL_MSVC decode_numeric(struct DecoderState *ds) { - int intNeg = 1; - JSUINT64 intValue; - JSUINT64 prevIntValue; - int chr; - int decimalCount = 0; - double frcValue = 0.0; - double expNeg; - double expValue; - char *offset = ds->start; - - JSUINT64 overflowLimit = LLONG_MAX; - - if (*(offset) == 'I') { - goto DECODE_INF; - } else if (*(offset) == 'N') { - goto DECODE_NAN; - } else if (*(offset) == '-') { - offset++; - intNeg = -1; - overflowLimit = LLONG_MIN; - if (*(offset) == 'I') { - goto DECODE_INF; - } - } - - // Scan integer part - intValue = 0; - - while (1) { - chr = (int)(unsigned char)*(offset); - - switch (chr) { - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': { - // PERF: Don't do 64-bit arithmetic here unless we have to - prevIntValue = intValue; - intValue = intValue * 10ULL + (JSLONG) (chr - 48); - - if (intNeg == 1 && prevIntValue > intValue) { - return SetError(ds, -1, "Value is too big!"); - } else if (intNeg == -1 && intValue > overflowLimit) { - return SetError(ds, -1, overflowLimit == LLONG_MAX ? - "Value is too big!" : "Value is too small"); - } - - offset++; - break; - } - case '.': { - offset++; - goto DECODE_FRACTION; - break; - } - case 'e': - case 'E': { - offset++; - goto DECODE_EXPONENT; - break; - } - - default: { - goto BREAK_INT_LOOP; - break; - } - } - } - -BREAK_INT_LOOP: - - ds->lastType = JT_INT; - ds->start = offset; - - if (intNeg == 1 && (intValue & 0x8000000000000000ULL) != 0) - return ds->dec->newUnsignedLong(ds->prv, intValue); - else if ((intValue >> 31)) - return ds->dec->newLong(ds->prv, (JSINT64)(intValue * (JSINT64)intNeg)); - else - return ds->dec->newInt(ds->prv, (JSINT32)(intValue * intNeg)); - -DECODE_FRACTION: - - if (ds->dec->preciseFloat) { - return decodePreciseFloat(ds); - } - - // Scan fraction part - frcValue = 0.0; - for (;;) { - chr = (int)(unsigned char)*(offset); - - switch (chr) { - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': { - if (decimalCount < JSON_DOUBLE_MAX_DECIMALS) { - frcValue = frcValue * 10.0 + (double)(chr - 48); - decimalCount++; - } - offset++; - break; - } - case 'e': - case 'E': { - offset++; - goto DECODE_EXPONENT; - break; - } - default: { goto BREAK_FRC_LOOP; } - } - } - -BREAK_FRC_LOOP: - // FIXME: Check for arithmetic overflow here - ds->lastType = JT_DOUBLE; - ds->start = offset; - return ds->dec->newDouble( - ds->prv, - createDouble((double)intNeg, (double)intValue, frcValue, decimalCount)); - -DECODE_EXPONENT: - if (ds->dec->preciseFloat) { - return decodePreciseFloat(ds); - } - - expNeg = 1.0; - - if (*(offset) == '-') { - expNeg = -1.0; - offset++; - } else if (*(offset) == '+') { - expNeg = +1.0; - offset++; - } - - expValue = 0.0; - - for (;;) { - chr = (int)(unsigned char)*(offset); - - switch (chr) { - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': { - expValue = expValue * 10.0 + (double)(chr - 48); - offset++; - break; - } - default: { goto BREAK_EXP_LOOP; } - } - } - -DECODE_NAN: - offset++; - if (*(offset++) != 'a') goto SET_NAN_ERROR; - if (*(offset++) != 'N') goto SET_NAN_ERROR; - - ds->lastType = JT_NULL; - ds->start = offset; - return ds->dec->newNull(ds->prv); - -SET_NAN_ERROR: - return SetError(ds, -1, "Unexpected character found when decoding 'NaN'"); - -DECODE_INF: - offset++; - if (*(offset++) != 'n') goto SET_INF_ERROR; - if (*(offset++) != 'f') goto SET_INF_ERROR; - if (*(offset++) != 'i') goto SET_INF_ERROR; - if (*(offset++) != 'n') goto SET_INF_ERROR; - if (*(offset++) != 'i') goto SET_INF_ERROR; - if (*(offset++) != 't') goto SET_INF_ERROR; - if (*(offset++) != 'y') goto SET_INF_ERROR; - - ds->start = offset; - - if (intNeg == 1) { - ds->lastType = JT_POS_INF; - return ds->dec->newPosInf(ds->prv); - } else { - ds->lastType = JT_NEG_INF; - return ds->dec->newNegInf(ds->prv); - } - -SET_INF_ERROR: - if (intNeg == 1) { - const char *msg = "Unexpected character found when decoding 'Infinity'"; - return SetError(ds, -1, msg); - } else { - const char *msg = "Unexpected character found when decoding '-Infinity'"; - return SetError(ds, -1, msg); - } - - -BREAK_EXP_LOOP: - // FIXME: Check for arithmetic overflow here - ds->lastType = JT_DOUBLE; - ds->start = offset; - return ds->dec->newDouble( - ds->prv, - createDouble((double)intNeg, (double)intValue, frcValue, decimalCount) * - pow(10.0, expValue * expNeg)); -} - -JSOBJ FASTCALL_MSVC decode_true(struct DecoderState *ds) { - char *offset = ds->start; - offset++; - - if (*(offset++) != 'r') goto SETERROR; - if (*(offset++) != 'u') goto SETERROR; - if (*(offset++) != 'e') goto SETERROR; - - ds->lastType = JT_TRUE; - ds->start = offset; - return ds->dec->newTrue(ds->prv); - -SETERROR: - return SetError(ds, -1, "Unexpected character found when decoding 'true'"); -} - -JSOBJ FASTCALL_MSVC decode_false(struct DecoderState *ds) { - char *offset = ds->start; - offset++; - - if (*(offset++) != 'a') goto SETERROR; - if (*(offset++) != 'l') goto SETERROR; - if (*(offset++) != 's') goto SETERROR; - if (*(offset++) != 'e') goto SETERROR; - - ds->lastType = JT_FALSE; - ds->start = offset; - return ds->dec->newFalse(ds->prv); - -SETERROR: - return SetError(ds, -1, "Unexpected character found when decoding 'false'"); -} - -JSOBJ FASTCALL_MSVC decode_null(struct DecoderState *ds) { - char *offset = ds->start; - offset++; - - if (*(offset++) != 'u') goto SETERROR; - if (*(offset++) != 'l') goto SETERROR; - if (*(offset++) != 'l') goto SETERROR; - - ds->lastType = JT_NULL; - ds->start = offset; - return ds->dec->newNull(ds->prv); - -SETERROR: - return SetError(ds, -1, "Unexpected character found when decoding 'null'"); -} - -void FASTCALL_MSVC SkipWhitespace(struct DecoderState *ds) { - char *offset; - - for (offset = ds->start; (ds->end - offset) > 0; offset++) { - switch (*offset) { - case ' ': - case '\t': - case '\r': - case '\n': - break; - - default: - ds->start = offset; - return; - } - } - - if (offset == ds->end) { - ds->start = ds->end; - } -} - -enum DECODESTRINGSTATE { - DS_ISNULL = 0x32, - DS_ISQUOTE, - DS_ISESCAPE, - DS_UTFLENERROR, -}; - -static const JSUINT8 g_decoderLookup[256] = { - /* 0x00 */ DS_ISNULL, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - /* 0x10 */ 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - /* 0x20 */ 1, - 1, - DS_ISQUOTE, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - /* 0x30 */ 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - /* 0x40 */ 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - /* 0x50 */ 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - DS_ISESCAPE, - 1, - 1, - 1, - /* 0x60 */ 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - /* 0x70 */ 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - /* 0x80 */ 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - /* 0x90 */ 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - /* 0xa0 */ 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - /* 0xb0 */ 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - /* 0xc0 */ 2, - 2, - 2, - 2, - 2, - 2, - 2, - 2, - 2, - 2, - 2, - 2, - 2, - 2, - 2, - 2, - /* 0xd0 */ 2, - 2, - 2, - 2, - 2, - 2, - 2, - 2, - 2, - 2, - 2, - 2, - 2, - 2, - 2, - 2, - /* 0xe0 */ 3, - 3, - 3, - 3, - 3, - 3, - 3, - 3, - 3, - 3, - 3, - 3, - 3, - 3, - 3, - 3, - /* 0xf0 */ 4, - 4, - 4, - 4, - 4, - 4, - 4, - 4, - DS_UTFLENERROR, - DS_UTFLENERROR, - DS_UTFLENERROR, - DS_UTFLENERROR, - DS_UTFLENERROR, - DS_UTFLENERROR, - DS_UTFLENERROR, - DS_UTFLENERROR, -}; - -JSOBJ FASTCALL_MSVC decode_string(struct DecoderState *ds) { - JSUTF16 sur[2] = {0}; - int iSur = 0; - int index; - wchar_t *escOffset; - wchar_t *escStart; - size_t escLen = (ds->escEnd - ds->escStart); - JSUINT8 *inputOffset; - JSUINT8 oct; - JSUTF32 ucs; - ds->lastType = JT_INVALID; - ds->start++; - - if ((size_t)(ds->end - ds->start) > escLen) { - size_t newSize = (ds->end - ds->start); - - if (ds->escHeap) { - if (newSize > (SIZE_MAX / sizeof(wchar_t))) { - return SetError(ds, -1, "Could not reserve memory block"); - } - escStart = (wchar_t *)ds->dec->realloc(ds->escStart, - newSize * sizeof(wchar_t)); - if (!escStart) { - ds->dec->free(ds->escStart); - return SetError(ds, -1, "Could not reserve memory block"); - } - ds->escStart = escStart; - } else { - wchar_t *oldStart = ds->escStart; - if (newSize > (SIZE_MAX / sizeof(wchar_t))) { - return SetError(ds, -1, "Could not reserve memory block"); - } - ds->escStart = - (wchar_t *)ds->dec->malloc(newSize * sizeof(wchar_t)); - if (!ds->escStart) { - return SetError(ds, -1, "Could not reserve memory block"); - } - ds->escHeap = 1; - memcpy(ds->escStart, oldStart, escLen * sizeof(wchar_t)); - } - - ds->escEnd = ds->escStart + newSize; - } - - escOffset = ds->escStart; - inputOffset = (JSUINT8 *)ds->start; - - for (;;) { - switch (g_decoderLookup[(JSUINT8)(*inputOffset)]) { - case DS_ISNULL: { - return SetError(ds, -1, - "Unmatched ''\"' when when decoding 'string'"); - } - case DS_ISQUOTE: { - ds->lastType = JT_UTF8; - inputOffset++; - ds->start += ((char *)inputOffset - (ds->start)); - return ds->dec->newString(ds->prv, ds->escStart, escOffset); - } - case DS_UTFLENERROR: { - return SetError( - ds, -1, - "Invalid UTF-8 sequence length when decoding 'string'"); - } - case DS_ISESCAPE: - inputOffset++; - switch (*inputOffset) { - case '\\': - *(escOffset++) = L'\\'; - inputOffset++; - continue; - case '\"': - *(escOffset++) = L'\"'; - inputOffset++; - continue; - case '/': - *(escOffset++) = L'/'; - inputOffset++; - continue; - case 'b': - *(escOffset++) = L'\b'; - inputOffset++; - continue; - case 'f': - *(escOffset++) = L'\f'; - inputOffset++; - continue; - case 'n': - *(escOffset++) = L'\n'; - inputOffset++; - continue; - case 'r': - *(escOffset++) = L'\r'; - inputOffset++; - continue; - case 't': - *(escOffset++) = L'\t'; - inputOffset++; - continue; - - case 'u': { - int index; - inputOffset++; - - for (index = 0; index < 4; index++) { - switch (*inputOffset) { - case '\0': - return SetError(ds, -1, - "Unterminated unicode " - "escape sequence when " - "decoding 'string'"); - default: - return SetError(ds, -1, - "Unexpected character in " - "unicode escape sequence " - "when decoding 'string'"); - - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - sur[iSur] = (sur[iSur] << 4) + - (JSUTF16)(*inputOffset - '0'); - break; - - case 'a': - case 'b': - case 'c': - case 'd': - case 'e': - case 'f': - sur[iSur] = (sur[iSur] << 4) + 10 + - (JSUTF16)(*inputOffset - 'a'); - break; - - case 'A': - case 'B': - case 'C': - case 'D': - case 'E': - case 'F': - sur[iSur] = (sur[iSur] << 4) + 10 + - (JSUTF16)(*inputOffset - 'A'); - break; - } - - inputOffset++; - } - - if (iSur == 0) { - if ((sur[iSur] & 0xfc00) == 0xd800) { - // First of a surrogate pair, continue parsing - iSur++; - break; - } - (*escOffset++) = (wchar_t)sur[iSur]; - iSur = 0; - } else { - // Decode pair - if ((sur[1] & 0xfc00) != 0xdc00) { - return SetError(ds, -1, - "Unpaired high surrogate when " - "decoding 'string'"); - } -#if WCHAR_MAX == 0xffff - (*escOffset++) = (wchar_t)sur[0]; - (*escOffset++) = (wchar_t)sur[1]; -#else - (*escOffset++) = - (wchar_t)0x10000 + - (((sur[0] - 0xd800) << 10) | (sur[1] - 0xdc00)); -#endif - iSur = 0; - } - break; - } - - case '\0': - return SetError(ds, -1, - "Unterminated escape sequence when " - "decoding 'string'"); - default: - return SetError(ds, -1, - "Unrecognized escape sequence when " - "decoding 'string'"); - } - break; - - case 1: { - *(escOffset++) = (wchar_t)(*inputOffset++); - break; - } - - case 2: { - ucs = (*inputOffset++) & 0x1f; - ucs <<= 6; - if (((*inputOffset) & 0x80) != 0x80) { - return SetError(ds, -1, - "Invalid octet in UTF-8 sequence when " - "decoding 'string'"); - } - ucs |= (*inputOffset++) & 0x3f; - if (ucs < 0x80) - return SetError(ds, -1, - "Overlong 2 byte UTF-8 sequence detected " - "when decoding 'string'"); - *(escOffset++) = (wchar_t)ucs; - break; - } - - case 3: { - JSUTF32 ucs = 0; - ucs |= (*inputOffset++) & 0x0f; - - for (index = 0; index < 2; index++) { - ucs <<= 6; - oct = (*inputOffset++); - - if ((oct & 0x80) != 0x80) { - return SetError(ds, -1, - "Invalid octet in UTF-8 sequence when " - "decoding 'string'"); - } - - ucs |= oct & 0x3f; - } - - if (ucs < 0x800) - return SetError(ds, -1, - "Overlong 3 byte UTF-8 sequence detected " - "when encoding string"); - *(escOffset++) = (wchar_t)ucs; - break; - } - - case 4: { - JSUTF32 ucs = 0; - ucs |= (*inputOffset++) & 0x07; - - for (index = 0; index < 3; index++) { - ucs <<= 6; - oct = (*inputOffset++); - - if ((oct & 0x80) != 0x80) { - return SetError(ds, -1, - "Invalid octet in UTF-8 sequence when " - "decoding 'string'"); - } - - ucs |= oct & 0x3f; - } - - if (ucs < 0x10000) - return SetError(ds, -1, - "Overlong 4 byte UTF-8 sequence detected " - "when decoding 'string'"); - -#if WCHAR_MAX == 0xffff - if (ucs >= 0x10000) { - ucs -= 0x10000; - *(escOffset++) = (wchar_t)(ucs >> 10) + 0xd800; - *(escOffset++) = (wchar_t)(ucs & 0x3ff) + 0xdc00; - } else { - *(escOffset++) = (wchar_t)ucs; - } -#else - *(escOffset++) = (wchar_t)ucs; -#endif - break; - } - } - } -} - -JSOBJ FASTCALL_MSVC decode_array(struct DecoderState *ds) { - JSOBJ itemValue; - JSOBJ newObj; - int len; - ds->objDepth++; - if (ds->objDepth > JSON_MAX_OBJECT_DEPTH) { - return SetError(ds, -1, "Reached object decoding depth limit"); - } - - newObj = ds->dec->newArray(ds->prv, ds->dec); - len = 0; - - ds->lastType = JT_INVALID; - ds->start++; - - for (;;) { - SkipWhitespace(ds); - - if ((*ds->start) == ']') { - ds->objDepth--; - if (len == 0) { - ds->start++; - return ds->dec->endArray(ds->prv, newObj); - } - - ds->dec->releaseObject(ds->prv, newObj, ds->dec); - return SetError( - ds, -1, - "Unexpected character found when decoding array value (1)"); - } - - itemValue = decode_any(ds); - - if (itemValue == NULL) { - ds->dec->releaseObject(ds->prv, newObj, ds->dec); - return NULL; - } - - if (!ds->dec->arrayAddItem(ds->prv, newObj, itemValue)) { - ds->dec->releaseObject(ds->prv, newObj, ds->dec); - return NULL; - } - - SkipWhitespace(ds); - - switch (*(ds->start++)) { - case ']': { - ds->objDepth--; - return ds->dec->endArray(ds->prv, newObj); - } - case ',': - break; - - default: - ds->dec->releaseObject(ds->prv, newObj, ds->dec); - return SetError( - ds, -1, - "Unexpected character found when decoding array value (2)"); - } - - len++; - } -} - -JSOBJ FASTCALL_MSVC decode_object(struct DecoderState *ds) { - JSOBJ itemName; - JSOBJ itemValue; - JSOBJ newObj; - - ds->objDepth++; - if (ds->objDepth > JSON_MAX_OBJECT_DEPTH) { - return SetError(ds, -1, "Reached object decoding depth limit"); - } - - newObj = ds->dec->newObject(ds->prv, ds->dec); - - ds->start++; - - for (;;) { - SkipWhitespace(ds); - - if ((*ds->start) == '}') { - ds->objDepth--; - ds->start++; - return ds->dec->endObject(ds->prv, newObj); - } - - ds->lastType = JT_INVALID; - itemName = decode_any(ds); - - if (itemName == NULL) { - ds->dec->releaseObject(ds->prv, newObj, ds->dec); - return NULL; - } - - if (ds->lastType != JT_UTF8) { - ds->dec->releaseObject(ds->prv, newObj, ds->dec); - ds->dec->releaseObject(ds->prv, itemName, ds->dec); - return SetError( - ds, -1, - "Key name of object must be 'string' when decoding 'object'"); - } - - SkipWhitespace(ds); - - if (*(ds->start++) != ':') { - ds->dec->releaseObject(ds->prv, newObj, ds->dec); - ds->dec->releaseObject(ds->prv, itemName, ds->dec); - return SetError(ds, -1, "No ':' found when decoding object value"); - } - - SkipWhitespace(ds); - - itemValue = decode_any(ds); - - if (itemValue == NULL) { - ds->dec->releaseObject(ds->prv, newObj, ds->dec); - ds->dec->releaseObject(ds->prv, itemName, ds->dec); - return NULL; - } - - if (!ds->dec->objectAddKey(ds->prv, newObj, itemName, itemValue)) { - ds->dec->releaseObject(ds->prv, newObj, ds->dec); - ds->dec->releaseObject(ds->prv, itemName, ds->dec); - ds->dec->releaseObject(ds->prv, itemValue, ds->dec); - return NULL; - } - - SkipWhitespace(ds); - - switch (*(ds->start++)) { - case '}': { - ds->objDepth--; - return ds->dec->endObject(ds->prv, newObj); - } - case ',': - break; - - default: - ds->dec->releaseObject(ds->prv, newObj, ds->dec); - return SetError( - ds, -1, - "Unexpected character found when decoding object value"); - } - } -} - -JSOBJ FASTCALL_MSVC decode_any(struct DecoderState *ds) { - for (;;) { - switch (*ds->start) { - case '\"': - return decode_string(ds); - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - case 'I': - case 'N': - case '-': - return decode_numeric(ds); - - case '[': - return decode_array(ds); - case '{': - return decode_object(ds); - case 't': - return decode_true(ds); - case 'f': - return decode_false(ds); - case 'n': - return decode_null(ds); - - case ' ': - case '\t': - case '\r': - case '\n': - // White space - ds->start++; - break; - - default: - return SetError(ds, -1, "Expected object or value"); - } - } -} - -JSOBJ JSON_DecodeObject(JSONObjectDecoder *dec, const char *buffer, - size_t cbBuffer) { - /* - FIXME: Base the size of escBuffer of that of cbBuffer so that the unicode - escaping doesn't run into the wall each time */ - char *locale; - struct DecoderState ds; - wchar_t escBuffer[(JSON_MAX_STACK_BUFFER_SIZE / sizeof(wchar_t))]; - JSOBJ ret; - - ds.start = (char *)buffer; - ds.end = ds.start + cbBuffer; - - ds.escStart = escBuffer; - ds.escEnd = ds.escStart + (JSON_MAX_STACK_BUFFER_SIZE / sizeof(wchar_t)); - ds.escHeap = 0; - ds.prv = dec->prv; - ds.dec = dec; - ds.dec->errorStr = NULL; - ds.dec->errorOffset = NULL; - ds.objDepth = 0; - - ds.dec = dec; - - locale = setlocale(LC_NUMERIC, NULL); - if (!locale) { - return SetError(&ds, -1, "setlocale call failed"); - } - - if (strcmp(locale, "C")) { - size_t len = strlen(locale) + 1; - char *saved_locale = malloc(len); - if (saved_locale == NULL) { - return SetError(&ds, -1, "Could not reserve memory block"); - } - memcpy(saved_locale, locale, len); - setlocale(LC_NUMERIC, "C"); - ret = decode_any(&ds); - setlocale(LC_NUMERIC, saved_locale); - free(saved_locale); - } else { - ret = decode_any(&ds); - } - - if (ds.escHeap) { - dec->free(ds.escStart); - } - - SkipWhitespace(&ds); - - if (ds.start != ds.end && ret) { - dec->releaseObject(ds.prv, ret, ds.dec); - return SetError(&ds, -1, "Trailing data"); - } - - return ret; -} diff --git a/pandas/_libs/src/ujson/lib/ultrajsonenc.c b/pandas/_libs/src/ujson/lib/ultrajsonenc.c deleted file mode 100644 index 169c5b6889077..0000000000000 --- a/pandas/_libs/src/ujson/lib/ultrajsonenc.c +++ /dev/null @@ -1,1207 +0,0 @@ -/* -Copyright (c) 2011-2013, ESN Social Software AB and Jonas Tarnstrom -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the ESN Social Software AB nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL ESN SOCIAL SOFTWARE AB OR JONAS TARNSTROM BE -LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - -Portions of code from MODP_ASCII - Ascii transformations (upper/lower, etc) -https://github.com/client9/stringencoders -Copyright (c) 2007 Nick Galbreath -- nickg [at] modp [dot] com. All rights -reserved. - -Numeric decoder derived from TCL library -https://www.opensource.apple.com/source/tcl/tcl-14/tcl/license.terms - * Copyright (c) 1988-1993 The Regents of the University of California. - * Copyright (c) 1994 Sun Microsystems, Inc. -*/ - -#include -#include -#include -#include -#include -#include -#include -#include "ultrajson.h" - -#ifndef TRUE -#define TRUE 1 -#endif -#ifndef FALSE -#define FALSE 0 -#endif - -/* -Worst cases being: - -Control characters (ASCII < 32) -0x00 (1 byte) input => \u0000 output (6 bytes) -1 * 6 => 6 (6 bytes required) - -or UTF-16 surrogate pairs -4 bytes input in UTF-8 => \uXXXX\uYYYY (12 bytes). - -4 * 6 => 24 bytes (12 bytes required) - -The extra 2 bytes are for the quotes around the string - -*/ -#define RESERVE_STRING(_len) (2 + ((_len)*6)) - -static const double g_pow10[] = {1, - 10, - 100, - 1000, - 10000, - 100000, - 1000000, - 10000000, - 100000000, - 1000000000, - 10000000000, - 100000000000, - 1000000000000, - 10000000000000, - 100000000000000, - 1000000000000000}; -static const char g_hexChars[] = "0123456789abcdef"; -static const char g_escapeChars[] = "0123456789\\b\\t\\n\\f\\r\\\"\\\\\\/"; - -/* -FIXME: While this is fine dandy and working it's a magic value mess which -probably only the author understands. -Needs a cleanup and more documentation */ - -/* -Table for pure ascii output escaping all characters above 127 to \uXXXX */ -static const JSUINT8 g_asciiOutputTable[256] = { - /* 0x00 */ 0, - 30, - 30, - 30, - 30, - 30, - 30, - 30, - 10, - 12, - 14, - 30, - 16, - 18, - 30, - 30, - /* 0x10 */ 30, - 30, - 30, - 30, - 30, - 30, - 30, - 30, - 30, - 30, - 30, - 30, - 30, - 30, - 30, - 30, - /* 0x20 */ 1, - 1, - 20, - 1, - 1, - 1, - 29, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 24, - /* 0x30 */ 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 29, - 1, - 29, - 1, - /* 0x40 */ 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - /* 0x50 */ 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 22, - 1, - 1, - 1, - /* 0x60 */ 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - /* 0x70 */ 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - /* 0x80 */ 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - /* 0x90 */ 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - /* 0xa0 */ 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - /* 0xb0 */ 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - /* 0xc0 */ 2, - 2, - 2, - 2, - 2, - 2, - 2, - 2, - 2, - 2, - 2, - 2, - 2, - 2, - 2, - 2, - /* 0xd0 */ 2, - 2, - 2, - 2, - 2, - 2, - 2, - 2, - 2, - 2, - 2, - 2, - 2, - 2, - 2, - 2, - /* 0xe0 */ 3, - 3, - 3, - 3, - 3, - 3, - 3, - 3, - 3, - 3, - 3, - 3, - 3, - 3, - 3, - 3, - /* 0xf0 */ 4, - 4, - 4, - 4, - 4, - 4, - 4, - 4, - 5, - 5, - 5, - 5, - 6, - 6, - 1, - 1}; - -static void SetError(JSOBJ obj, JSONObjectEncoder *enc, const char *message) { - enc->errorMsg = message; - enc->errorObj = obj; -} - -/* -FIXME: Keep track of how big these get across several encoder calls and try to -make an estimate -That way we won't run our head into the wall each call */ -void Buffer_Realloc(JSONObjectEncoder *enc, size_t cbNeeded) { - size_t curSize = enc->end - enc->start; - size_t newSize = curSize * 2; - size_t offset = enc->offset - enc->start; - - while (newSize < curSize + cbNeeded) { - newSize *= 2; - } - - if (enc->heap) { - enc->start = (char *)enc->realloc(enc->start, newSize); - if (!enc->start) { - SetError(NULL, enc, "Could not reserve memory block"); - return; - } - } else { - char *oldStart = enc->start; - enc->heap = 1; - enc->start = (char *)enc->malloc(newSize); - if (!enc->start) { - SetError(NULL, enc, "Could not reserve memory block"); - return; - } - memcpy(enc->start, oldStart, offset); - } - enc->offset = enc->start + offset; - enc->end = enc->start + newSize; -} - -INLINE_PREFIX void FASTCALL_MSVC -Buffer_AppendShortHexUnchecked(char *outputOffset, unsigned short value) { - *(outputOffset++) = g_hexChars[(value & 0xf000) >> 12]; - *(outputOffset++) = g_hexChars[(value & 0x0f00) >> 8]; - *(outputOffset++) = g_hexChars[(value & 0x00f0) >> 4]; - *(outputOffset++) = g_hexChars[(value & 0x000f) >> 0]; -} - -int Buffer_EscapeStringUnvalidated(JSONObjectEncoder *enc, const char *io, - const char *end) { - char *of = (char *)enc->offset; - - for (;;) { - switch (*io) { - case 0x00: { - if (io < end) { - *(of++) = '\\'; - *(of++) = 'u'; - *(of++) = '0'; - *(of++) = '0'; - *(of++) = '0'; - *(of++) = '0'; - break; - } else { - enc->offset += (of - enc->offset); - return TRUE; - } - } - case '\"': - (*of++) = '\\'; - (*of++) = '\"'; - break; - case '\\': - (*of++) = '\\'; - (*of++) = '\\'; - break; - case '/': - (*of++) = '\\'; - (*of++) = '/'; - break; - case '\b': - (*of++) = '\\'; - (*of++) = 'b'; - break; - case '\f': - (*of++) = '\\'; - (*of++) = 'f'; - break; - case '\n': - (*of++) = '\\'; - (*of++) = 'n'; - break; - case '\r': - (*of++) = '\\'; - (*of++) = 'r'; - break; - case '\t': - (*of++) = '\\'; - (*of++) = 't'; - break; - - case 0x26: // '/' - case 0x3c: // '<' - case 0x3e: // '>' - { - if (enc->encodeHTMLChars) { - // Fall through to \u00XX case below. - } else { - // Same as default case below. - (*of++) = (*io); - break; - } - } - case 0x01: - case 0x02: - case 0x03: - case 0x04: - case 0x05: - case 0x06: - case 0x07: - case 0x0b: - case 0x0e: - case 0x0f: - case 0x10: - case 0x11: - case 0x12: - case 0x13: - case 0x14: - case 0x15: - case 0x16: - case 0x17: - case 0x18: - case 0x19: - case 0x1a: - case 0x1b: - case 0x1c: - case 0x1d: - case 0x1e: - case 0x1f: { - *(of++) = '\\'; - *(of++) = 'u'; - *(of++) = '0'; - *(of++) = '0'; - *(of++) = g_hexChars[(unsigned char)(((*io) & 0xf0) >> 4)]; - *(of++) = g_hexChars[(unsigned char)((*io) & 0x0f)]; - break; - } - default: - (*of++) = (*io); - break; - } - io++; - } -} - -int Buffer_EscapeStringValidated(JSOBJ obj, JSONObjectEncoder *enc, - const char *io, const char *end) { - JSUTF32 ucs; - char *of = (char *)enc->offset; - - for (;;) { - JSUINT8 utflen = g_asciiOutputTable[(unsigned char)*io]; - - switch (utflen) { - case 0: { - if (io < end) { - *(of++) = '\\'; - *(of++) = 'u'; - *(of++) = '0'; - *(of++) = '0'; - *(of++) = '0'; - *(of++) = '0'; - io++; - continue; - } else { - enc->offset += (of - enc->offset); - return TRUE; - } - } - - case 1: { - *(of++) = (*io++); - continue; - } - - case 2: { - JSUTF32 in; - JSUTF16 in16; - - if (end - io < 1) { - enc->offset += (of - enc->offset); - SetError( - obj, enc, - "Unterminated UTF-8 sequence when encoding string"); - return FALSE; - } - - memcpy(&in16, io, sizeof(JSUTF16)); - in = (JSUTF32)in16; - -#ifdef __LITTLE_ENDIAN__ - ucs = ((in & 0x1f) << 6) | ((in >> 8) & 0x3f); -#else - ucs = ((in & 0x1f00) >> 2) | (in & 0x3f); -#endif - - if (ucs < 0x80) { - enc->offset += (of - enc->offset); - SetError(obj, enc, - "Overlong 2 byte UTF-8 sequence detected when " - "encoding string"); - return FALSE; - } - - io += 2; - break; - } - - case 3: { - JSUTF32 in; - JSUTF16 in16; - JSUINT8 in8; - - if (end - io < 2) { - enc->offset += (of - enc->offset); - SetError( - obj, enc, - "Unterminated UTF-8 sequence when encoding string"); - return FALSE; - } - - memcpy(&in16, io, sizeof(JSUTF16)); - memcpy(&in8, io + 2, sizeof(JSUINT8)); -#ifdef __LITTLE_ENDIAN__ - in = (JSUTF32)in16; - in |= in8 << 16; - ucs = ((in & 0x0f) << 12) | ((in & 0x3f00) >> 2) | - ((in & 0x3f0000) >> 16); -#else - in = in16 << 8; - in |= in8; - ucs = - ((in & 0x0f0000) >> 4) | ((in & 0x3f00) >> 2) | (in & 0x3f); -#endif - - if (ucs < 0x800) { - enc->offset += (of - enc->offset); - SetError(obj, enc, - "Overlong 3 byte UTF-8 sequence detected when " - "encoding string"); - return FALSE; - } - - io += 3; - break; - } - case 4: { - JSUTF32 in; - - if (end - io < 3) { - enc->offset += (of - enc->offset); - SetError( - obj, enc, - "Unterminated UTF-8 sequence when encoding string"); - return FALSE; - } - - memcpy(&in, io, sizeof(JSUTF32)); -#ifdef __LITTLE_ENDIAN__ - ucs = ((in & 0x07) << 18) | ((in & 0x3f00) << 4) | - ((in & 0x3f0000) >> 10) | ((in & 0x3f000000) >> 24); -#else - ucs = ((in & 0x07000000) >> 6) | ((in & 0x3f0000) >> 4) | - ((in & 0x3f00) >> 2) | (in & 0x3f); -#endif - if (ucs < 0x10000) { - enc->offset += (of - enc->offset); - SetError(obj, enc, - "Overlong 4 byte UTF-8 sequence detected when " - "encoding string"); - return FALSE; - } - - io += 4; - break; - } - - case 5: - case 6: { - enc->offset += (of - enc->offset); - SetError( - obj, enc, - "Unsupported UTF-8 sequence length when encoding string"); - return FALSE; - } - - case 29: { - if (enc->encodeHTMLChars) { - // Fall through to \u00XX case 30 below. - } else { - // Same as case 1 above. - *(of++) = (*io++); - continue; - } - } - - case 30: { - // \uXXXX encode - *(of++) = '\\'; - *(of++) = 'u'; - *(of++) = '0'; - *(of++) = '0'; - *(of++) = g_hexChars[(unsigned char)(((*io) & 0xf0) >> 4)]; - *(of++) = g_hexChars[(unsigned char)((*io) & 0x0f)]; - io++; - continue; - } - case 10: - case 12: - case 14: - case 16: - case 18: - case 20: - case 22: - case 24: { - *(of++) = *((char *)(g_escapeChars + utflen + 0)); - *(of++) = *((char *)(g_escapeChars + utflen + 1)); - io++; - continue; - } - // This can never happen, it's here to make L4 VC++ happy - default: { - ucs = 0; - break; - } - } - - /* - If the character is a UTF8 sequence of length > 1 we end up here */ - if (ucs >= 0x10000) { - ucs -= 0x10000; - *(of++) = '\\'; - *(of++) = 'u'; - Buffer_AppendShortHexUnchecked( - of, (unsigned short)(ucs >> 10) + 0xd800); - of += 4; - - *(of++) = '\\'; - *(of++) = 'u'; - Buffer_AppendShortHexUnchecked( - of, (unsigned short)(ucs & 0x3ff) + 0xdc00); - of += 4; - } else { - *(of++) = '\\'; - *(of++) = 'u'; - Buffer_AppendShortHexUnchecked(of, (unsigned short)ucs); - of += 4; - } - } -} - -#define Buffer_Reserve(__enc, __len) \ - if ( (size_t) ((__enc)->end - (__enc)->offset) < (size_t) (__len)) \ - { \ - Buffer_Realloc((__enc), (__len));\ - } \ - -#define Buffer_AppendCharUnchecked(__enc, __chr) *((__enc)->offset++) = __chr; - -INLINE_PREFIX void FASTCALL_MSVC strreverse(char *begin, - char *end) { - char aux; - while (end > begin) aux = *end, *end-- = *begin, *begin++ = aux; -} - -void Buffer_AppendIndentNewlineUnchecked(JSONObjectEncoder *enc) { - if (enc->indent > 0) Buffer_AppendCharUnchecked(enc, '\n'); -} - -// This function could be refactored to only accept enc as an argument, -// but this is a straight vendor from ujson source -void Buffer_AppendIndentUnchecked(JSONObjectEncoder *enc, JSINT32 value) { - int i; - if (enc->indent > 0) { - while (value-- > 0) - for (i = 0; i < enc->indent; i++) - Buffer_AppendCharUnchecked(enc, ' '); - } -} - -void Buffer_AppendIntUnchecked(JSONObjectEncoder *enc, JSINT32 value) { - char *wstr; - JSUINT32 uvalue = (value < 0) ? -value : value; - wstr = enc->offset; - - // Conversion. Number is reversed. - do { - *wstr++ = (char)(48 + (uvalue % 10)); - } while (uvalue /= 10); - if (value < 0) *wstr++ = '-'; - - // Reverse string - strreverse(enc->offset, wstr - 1); - enc->offset += (wstr - (enc->offset)); -} - -void Buffer_AppendLongUnchecked(JSONObjectEncoder *enc, JSINT64 value) { - char *wstr; - JSUINT64 uvalue = (value < 0) ? -value : value; - - wstr = enc->offset; - // Conversion. Number is reversed. - - do { - *wstr++ = (char)(48 + (uvalue % 10ULL)); - } while (uvalue /= 10ULL); - if (value < 0) *wstr++ = '-'; - - // Reverse string - strreverse(enc->offset, wstr - 1); - enc->offset += (wstr - (enc->offset)); -} - -int Buffer_AppendDoubleUnchecked(JSOBJ obj, JSONObjectEncoder *enc, - double value) { - /* if input is beyond the thresholds, revert to exponential */ - const double thres_max = (double)1e16 - 1; - const double thres_min = (double)1e-15; - char precision_str[20]; - int count; - double diff = 0.0; - char *str = enc->offset; - char *wstr = str; - unsigned long long whole; - double tmp; - unsigned long long frac; - int neg; - double pow10; - - if (value == HUGE_VAL || value == -HUGE_VAL) { - SetError(obj, enc, "Invalid Inf value when encoding double"); - return FALSE; - } - - if (!(value == value)) { - SetError(obj, enc, "Invalid Nan value when encoding double"); - return FALSE; - } - - /* we'll work in positive values and deal with the - negative sign issue later */ - neg = 0; - if (value < 0) { - neg = 1; - value = -value; - } - - /* - for very large or small numbers switch back to native sprintf for - exponentials. anyone want to write code to replace this? */ - if (value > thres_max || (value != 0.0 && fabs(value) < thres_min)) { - precision_str[0] = '%'; - precision_str[1] = '.'; -#if defined(_WIN32) && defined(_MSC_VER) - sprintf_s(precision_str + 2, sizeof(precision_str) - 2, "%ug", - enc->doublePrecision); - enc->offset += sprintf_s(str, enc->end - enc->offset, precision_str, - neg ? -value : value); -#else - snprintf(precision_str + 2, sizeof(precision_str) - 2, "%ug", - enc->doublePrecision); - enc->offset += snprintf(str, enc->end - enc->offset, precision_str, - neg ? -value : value); -#endif - return TRUE; - } - - pow10 = g_pow10[enc->doublePrecision]; - - whole = (unsigned long long)value; - tmp = (value - whole) * pow10; - frac = (unsigned long long)(tmp); - diff = tmp - frac; - - if (diff > 0.5) { - ++frac; - } else if (diff == 0.5 && ((frac == 0) || (frac & 1))) { - /* if halfway, round up if odd, OR - if last digit is 0. That last part is strange */ - ++frac; - } - - // handle rollover, e.g. - // case 0.99 with prec 1 is 1.0 and case 0.95 with prec is 1.0 as well - if (frac >= pow10) { - frac = 0; - ++whole; - } - - if (enc->doublePrecision == 0) { - diff = value - whole; - - if (diff > 0.5) { - /* greater than 0.5, round up, e.g. 1.6 -> 2 */ - ++whole; - } else if (diff == 0.5 && (whole & 1)) { - /* exactly 0.5 and ODD, then round up */ - /* 1.5 -> 2, but 2.5 -> 2 */ - ++whole; - } - - // vvvvvvvvvvvvvvvvvvv Diff from modp_dto2 - } else if (frac) { - count = enc->doublePrecision; - // now do fractional part, as an unsigned number - // we know it is not 0 but we can have leading zeros, these - // should be removed - while (!(frac % 10)) { - --count; - frac /= 10; - } - //^^^^^^^^^^^^^^^^^^^ Diff from modp_dto2 - - // now do fractional part, as an unsigned number - do { - --count; - *wstr++ = (char)(48 + (frac % 10)); - } while (frac /= 10); - // add extra 0s - while (count-- > 0) { - *wstr++ = '0'; - } - // add decimal - *wstr++ = '.'; - } else { - *wstr++ = '0'; - *wstr++ = '.'; - } - - // Do whole part. Take care of sign - // conversion. Number is reversed. - do { - *wstr++ = (char)(48 + (whole % 10)); - } while (whole /= 10); - - if (neg) { - *wstr++ = '-'; - } - strreverse(str, wstr - 1); - enc->offset += (wstr - (enc->offset)); - - return TRUE; -} - -/* -FIXME: -Handle integration functions returning NULL here */ - -/* -FIXME: -Perhaps implement recursion detection */ - -void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name, - size_t cbName) { - const char *value; - char *objName; - int count; - JSOBJ iterObj; - size_t szlen; - JSONTypeContext tc; - tc.encoder = enc; - - if (enc->level > enc->recursionMax) { - SetError(obj, enc, "Maximum recursion level reached"); - return; - } - - /* - This reservation must hold - - length of _name as encoded worst case + - maxLength of double to string OR maxLength of JSLONG to string - */ - - Buffer_Reserve(enc, 256 + RESERVE_STRING(cbName)); - if (enc->errorMsg) { - return; - } - - if (name) { - Buffer_AppendCharUnchecked(enc, '\"'); - - if (enc->forceASCII) { - if (!Buffer_EscapeStringValidated(obj, enc, name, name + cbName)) { - return; - } - } else { - if (!Buffer_EscapeStringUnvalidated(enc, name, name + cbName)) { - return; - } - } - - Buffer_AppendCharUnchecked(enc, '\"'); - - Buffer_AppendCharUnchecked(enc, ':'); -#ifndef JSON_NO_EXTRA_WHITESPACE - Buffer_AppendCharUnchecked(enc, ' '); -#endif - } - - enc->beginTypeContext(obj, &tc); - - switch (tc.type) { - case JT_INVALID: { - return; - } - - case JT_ARRAY: { - count = 0; - enc->iterBegin(obj, &tc); - - Buffer_AppendCharUnchecked(enc, '['); - Buffer_AppendIndentNewlineUnchecked(enc); - - while (enc->iterNext(obj, &tc)) { - if (count > 0) { - Buffer_AppendCharUnchecked(enc, ','); -#ifndef JSON_NO_EXTRA_WHITESPACE - Buffer_AppendCharUnchecked(buffer, ' '); -#endif - Buffer_AppendIndentNewlineUnchecked(enc); - } - - iterObj = enc->iterGetValue(obj, &tc); - - enc->level++; - Buffer_AppendIndentUnchecked(enc, enc->level); - encode(iterObj, enc, NULL, 0); - count++; - } - - enc->iterEnd(obj, &tc); - Buffer_AppendIndentNewlineUnchecked(enc); - Buffer_AppendIndentUnchecked(enc, enc->level); - Buffer_AppendCharUnchecked(enc, ']'); - break; - } - - case JT_OBJECT: { - count = 0; - enc->iterBegin(obj, &tc); - - Buffer_AppendCharUnchecked(enc, '{'); - Buffer_AppendIndentNewlineUnchecked(enc); - - while (enc->iterNext(obj, &tc)) { - if (count > 0) { - Buffer_AppendCharUnchecked(enc, ','); -#ifndef JSON_NO_EXTRA_WHITESPACE - Buffer_AppendCharUnchecked(enc, ' '); -#endif - Buffer_AppendIndentNewlineUnchecked(enc); - } - - iterObj = enc->iterGetValue(obj, &tc); - objName = enc->iterGetName(obj, &tc, &szlen); - - enc->level++; - Buffer_AppendIndentUnchecked(enc, enc->level); - encode(iterObj, enc, objName, szlen); - count++; - } - - enc->iterEnd(obj, &tc); - Buffer_AppendIndentNewlineUnchecked(enc); - Buffer_AppendIndentUnchecked(enc, enc->level); - Buffer_AppendCharUnchecked(enc, '}'); - break; - } - - case JT_LONG: { - Buffer_AppendLongUnchecked(enc, enc->getLongValue(obj, &tc)); - break; - } - - case JT_INT: { - Buffer_AppendIntUnchecked(enc, enc->getIntValue(obj, &tc)); - break; - } - - case JT_TRUE: { - Buffer_AppendCharUnchecked(enc, 't'); - Buffer_AppendCharUnchecked(enc, 'r'); - Buffer_AppendCharUnchecked(enc, 'u'); - Buffer_AppendCharUnchecked(enc, 'e'); - break; - } - - case JT_FALSE: { - Buffer_AppendCharUnchecked(enc, 'f'); - Buffer_AppendCharUnchecked(enc, 'a'); - Buffer_AppendCharUnchecked(enc, 'l'); - Buffer_AppendCharUnchecked(enc, 's'); - Buffer_AppendCharUnchecked(enc, 'e'); - break; - } - - case JT_NULL: { - Buffer_AppendCharUnchecked(enc, 'n'); - Buffer_AppendCharUnchecked(enc, 'u'); - Buffer_AppendCharUnchecked(enc, 'l'); - Buffer_AppendCharUnchecked(enc, 'l'); - break; - } - - case JT_DOUBLE: { - if (!Buffer_AppendDoubleUnchecked(obj, enc, - enc->getDoubleValue(obj, &tc))) { - enc->endTypeContext(obj, &tc); - enc->level--; - return; - } - break; - } - - case JT_UTF8: { - value = enc->getStringValue(obj, &tc, &szlen); - if (enc->errorMsg) { - enc->endTypeContext(obj, &tc); - return; - } - Buffer_Reserve(enc, RESERVE_STRING(szlen)); - Buffer_AppendCharUnchecked(enc, '\"'); - - if (enc->forceASCII) { - if (!Buffer_EscapeStringValidated(obj, enc, value, - value + szlen)) { - enc->endTypeContext(obj, &tc); - enc->level--; - return; - } - } else { - if (!Buffer_EscapeStringUnvalidated(enc, value, - value + szlen)) { - enc->endTypeContext(obj, &tc); - enc->level--; - return; - } - } - - Buffer_AppendCharUnchecked(enc, '\"'); - break; - } - - case JT_BIGNUM: { - value = enc->getBigNumStringValue(obj, &tc, &szlen); - - Buffer_Reserve(enc, RESERVE_STRING(szlen)); - if (enc->errorMsg) { - enc->endTypeContext(obj, &tc); - return; - } - - if (enc->forceASCII) { - if (!Buffer_EscapeStringValidated(obj, enc, value, - value + szlen)) { - enc->endTypeContext(obj, &tc); - enc->level--; - return; - } - } else { - if (!Buffer_EscapeStringUnvalidated(enc, value, - value + szlen)) { - enc->endTypeContext(obj, &tc); - enc->level--; - return; - } - } - - break; - } - } - - enc->endTypeContext(obj, &tc); - enc->level--; -} - -char *JSON_EncodeObject(JSOBJ obj, JSONObjectEncoder *enc, char *_buffer, - size_t _cbBuffer) { - char *locale; - enc->malloc = enc->malloc ? enc->malloc : malloc; - enc->free = enc->free ? enc->free : free; - enc->realloc = enc->realloc ? enc->realloc : realloc; - enc->errorMsg = NULL; - enc->errorObj = NULL; - enc->level = 0; - - if (enc->recursionMax < 1) { - enc->recursionMax = JSON_MAX_RECURSION_DEPTH; - } - - if (enc->doublePrecision < 0 || - enc->doublePrecision > JSON_DOUBLE_MAX_DECIMALS) { - enc->doublePrecision = JSON_DOUBLE_MAX_DECIMALS; - } - - if (_buffer == NULL) { - _cbBuffer = 32768; - enc->start = (char *)enc->malloc(_cbBuffer); - if (!enc->start) { - SetError(obj, enc, "Could not reserve memory block"); - return NULL; - } - enc->heap = 1; - } else { - enc->start = _buffer; - enc->heap = 0; - } - - enc->end = enc->start + _cbBuffer; - enc->offset = enc->start; - - locale = setlocale(LC_NUMERIC, NULL); - if (!locale) { - SetError(NULL, enc, "setlocale call failed"); - return NULL; - } - - if (strcmp(locale, "C")) { - size_t len = strlen(locale) + 1; - char *saved_locale = malloc(len); - if (saved_locale == NULL) { - SetError(NULL, enc, "Could not reserve memory block"); - return NULL; - } - memcpy(saved_locale, locale, len); - setlocale(LC_NUMERIC, "C"); - encode(obj, enc, NULL, 0); - setlocale(LC_NUMERIC, saved_locale); - free(saved_locale); - } else { - encode(obj, enc, NULL, 0); - } - - Buffer_Reserve(enc, 1); - if (enc->errorMsg) { - return NULL; - } - Buffer_AppendCharUnchecked(enc, '\0'); - - return enc->start; -} diff --git a/pandas/_libs/src/ujson/python/JSONtoObj.c b/pandas/_libs/src/ujson/python/JSONtoObj.c deleted file mode 100644 index d7086ffba623a..0000000000000 --- a/pandas/_libs/src/ujson/python/JSONtoObj.c +++ /dev/null @@ -1,520 +0,0 @@ -/* -Copyright (c) 2011-2013, ESN Social Software AB and Jonas Tarnstrom -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the ESN Social Software AB nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL ESN SOCIAL SOFTWARE AB OR JONAS TARNSTROM BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - -Portions of code from MODP_ASCII - Ascii transformations (upper/lower, etc) -https://github.com/client9/stringencoders -Copyright (c) 2007 Nick Galbreath -- nickg [at] modp [dot] com. All rights reserved. - -Numeric decoder derived from TCL library -https://www.opensource.apple.com/source/tcl/tcl-14/tcl/license.terms - * Copyright (c) 1988-1993 The Regents of the University of California. - * Copyright (c) 1994 Sun Microsystems, Inc. -*/ - -#define PY_ARRAY_UNIQUE_SYMBOL UJSON_NUMPY -#define NO_IMPORT_ARRAY -#define PY_SSIZE_T_CLEAN -#include -#include -#include - -#define PRINTMARK() - -typedef struct __PyObjectDecoder { - JSONObjectDecoder dec; - - void *npyarr; // Numpy context buffer - void *npyarr_addr; // Ref to npyarr ptr to track DECREF calls - npy_intp curdim; // Current array dimension - - PyArray_Descr *dtype; -} PyObjectDecoder; - -typedef struct __NpyArrContext { - PyObject *ret; - PyObject *labels[2]; - PyArray_Dims shape; - - PyObjectDecoder *dec; - - npy_intp i; - npy_intp elsize; - npy_intp elcount; -} NpyArrContext; - -// Numpy handling based on numpy internal code, specifically the function -// PyArray_FromIter. - -// numpy related functions are inter-dependent so declare them all here, -// to ensure the compiler catches any errors - -// standard numpy array handling -JSOBJ Object_npyNewArray(void *prv, void *decoder); -JSOBJ Object_npyEndArray(void *prv, JSOBJ obj); -int Object_npyArrayAddItem(void *prv, JSOBJ obj, JSOBJ value); - -// for more complex dtypes (object and string) fill a standard Python list -// and convert to a numpy array when done. -JSOBJ Object_npyNewArrayList(void *prv, void *decoder); -JSOBJ Object_npyEndArrayList(void *prv, JSOBJ obj); -int Object_npyArrayListAddItem(void *prv, JSOBJ obj, JSOBJ value); - -// free the numpy context buffer -void Npy_releaseContext(NpyArrContext *npyarr) { - PRINTMARK(); - if (npyarr) { - if (npyarr->shape.ptr) { - PyObject_Free(npyarr->shape.ptr); - } - if (npyarr->dec) { - npyarr->dec->npyarr = NULL; - npyarr->dec->curdim = 0; - } - Py_XDECREF(npyarr->labels[0]); - Py_XDECREF(npyarr->labels[1]); - Py_XDECREF(npyarr->ret); - PyObject_Free(npyarr); - } -} - -JSOBJ Object_npyNewArray(void *prv, void *_decoder) { - NpyArrContext *npyarr; - PyObjectDecoder *decoder = (PyObjectDecoder *)_decoder; - PRINTMARK(); - if (decoder->curdim <= 0) { - // start of array - initialise the context buffer - npyarr = decoder->npyarr = PyObject_Malloc(sizeof(NpyArrContext)); - decoder->npyarr_addr = npyarr; - - if (!npyarr) { - PyErr_NoMemory(); - return NULL; - } - - npyarr->dec = decoder; - npyarr->labels[0] = npyarr->labels[1] = NULL; - - npyarr->shape.ptr = PyObject_Malloc(sizeof(npy_intp) * NPY_MAXDIMS); - npyarr->shape.len = 1; - npyarr->ret = NULL; - - npyarr->elsize = 0; - npyarr->elcount = 4; - npyarr->i = 0; - } else { - // starting a new dimension continue the current array (and reshape - // after) - npyarr = (NpyArrContext *)decoder->npyarr; - if (decoder->curdim >= npyarr->shape.len) { - npyarr->shape.len++; - } - } - - npyarr->shape.ptr[decoder->curdim] = 0; - decoder->curdim++; - return npyarr; -} - -PyObject *Npy_returnLabelled(NpyArrContext *npyarr) { - PyObject *ret = npyarr->ret; - npy_intp i; - - if (npyarr->labels[0] || npyarr->labels[1]) { - // finished decoding, build tuple with values and labels - ret = PyTuple_New(npyarr->shape.len + 1); - for (i = 0; i < npyarr->shape.len; i++) { - if (npyarr->labels[i]) { - PyTuple_SET_ITEM(ret, i + 1, npyarr->labels[i]); - npyarr->labels[i] = NULL; - } else { - Py_INCREF(Py_None); - PyTuple_SET_ITEM(ret, i + 1, Py_None); - } - } - PyTuple_SET_ITEM(ret, 0, npyarr->ret); - } - - return ret; -} - -JSOBJ Object_npyEndArray(void *prv, JSOBJ obj) { - PyObject *ret; - char *new_data; - NpyArrContext *npyarr = (NpyArrContext *)obj; - int emptyType = NPY_DEFAULT_TYPE; - npy_intp i; - PRINTMARK(); - if (!npyarr) { - return NULL; - } - - ret = npyarr->ret; - i = npyarr->i; - - npyarr->dec->curdim--; - - if (i == 0 || !npyarr->ret) { - // empty array would not have been initialised so do it now. - if (npyarr->dec->dtype) { - emptyType = npyarr->dec->dtype->type_num; - } - npyarr->ret = ret = - PyArray_EMPTY(npyarr->shape.len, npyarr->shape.ptr, emptyType, 0); - } else if (npyarr->dec->curdim <= 0) { - // realloc to final size - new_data = PyDataMem_RENEW(PyArray_DATA(ret), i * npyarr->elsize); - if (new_data == NULL) { - PyErr_NoMemory(); - Npy_releaseContext(npyarr); - return NULL; - } - ((PyArrayObject *)ret)->data = (void *)new_data; - // PyArray_BYTES(ret) = new_data; - } - - if (npyarr->dec->curdim <= 0) { - // finished decoding array, reshape if necessary - if (npyarr->shape.len > 1) { - npyarr->ret = PyArray_Newshape((PyArrayObject *)ret, &npyarr->shape, - NPY_ANYORDER); - Py_DECREF(ret); - } - - ret = Npy_returnLabelled(npyarr); - - npyarr->ret = NULL; - Npy_releaseContext(npyarr); - } - - return ret; -} - -int Object_npyArrayAddItem(void *prv, JSOBJ obj, JSOBJ value) { - PyObject *type; - PyArray_Descr *dtype; - npy_intp i; - char *new_data, *item; - NpyArrContext *npyarr = (NpyArrContext *)obj; - PRINTMARK(); - if (!npyarr) { - return 0; - } - - i = npyarr->i; - - npyarr->shape.ptr[npyarr->dec->curdim - 1]++; - - if (PyArray_Check((PyObject *)value)) { - // multidimensional array, keep decoding values. - return 1; - } - - if (!npyarr->ret) { - // Array not initialised yet. - // We do it here so we can 'sniff' the data type if none was provided - if (!npyarr->dec->dtype) { - type = PyObject_Type(value); - if (!PyArray_DescrConverter(type, &dtype)) { - Py_DECREF(type); - goto fail; - } - Py_INCREF(dtype); - Py_DECREF(type); - } else { - dtype = PyArray_DescrNew(npyarr->dec->dtype); - } - - // If it's an object or string then fill a Python list and subsequently - // convert. Otherwise we would need to somehow mess about with - // reference counts when renewing memory. - npyarr->elsize = dtype->elsize; - if (PyDataType_REFCHK(dtype) || npyarr->elsize == 0) { - Py_XDECREF(dtype); - - if (npyarr->dec->curdim > 1) { - PyErr_SetString(PyExc_ValueError, - "Cannot decode multidimensional arrays with " - "variable length elements to numpy"); - goto fail; - } - npyarr->elcount = 0; - npyarr->ret = PyList_New(0); - if (!npyarr->ret) { - goto fail; - } - ((JSONObjectDecoder *)npyarr->dec)->newArray = - Object_npyNewArrayList; - ((JSONObjectDecoder *)npyarr->dec)->arrayAddItem = - Object_npyArrayListAddItem; - ((JSONObjectDecoder *)npyarr->dec)->endArray = - Object_npyEndArrayList; - return Object_npyArrayListAddItem(prv, obj, value); - } - - npyarr->ret = PyArray_NewFromDescr( - &PyArray_Type, dtype, 1, &npyarr->elcount, NULL, NULL, 0, NULL); - - if (!npyarr->ret) { - goto fail; - } - } - - if (i >= npyarr->elcount) { - // Grow PyArray_DATA(ret): - // this is similar for the strategy for PyListObject, but we use - // 50% overallocation => 0, 4, 8, 14, 23, 36, 56, 86 ... - if (npyarr->elsize == 0) { - PyErr_SetString(PyExc_ValueError, - "Cannot decode multidimensional arrays with " - "variable length elements to numpy"); - goto fail; - } - - npyarr->elcount = (i >> 1) + (i < 4 ? 4 : 2) + i; - if (npyarr->elcount <= NPY_MAX_INTP / npyarr->elsize) { - new_data = PyDataMem_RENEW(PyArray_DATA(npyarr->ret), - npyarr->elcount * npyarr->elsize); - } else { - PyErr_NoMemory(); - goto fail; - } - ((PyArrayObject *)npyarr->ret)->data = (void *)new_data; - - // PyArray_BYTES(npyarr->ret) = new_data; - } - - PyArray_DIMS(npyarr->ret)[0] = i + 1; - - if ((item = PyArray_GETPTR1(npyarr->ret, i)) == NULL || - PyArray_SETITEM(npyarr->ret, item, value) == -1) { - goto fail; - } - - Py_DECREF((PyObject *)value); - npyarr->i++; - return 1; - -fail: - - Npy_releaseContext(npyarr); - return 0; -} - -JSOBJ Object_npyNewArrayList(void *prv, void *_decoder) { - PyObjectDecoder *decoder = (PyObjectDecoder *)_decoder; - PRINTMARK(); - PyErr_SetString( - PyExc_ValueError, - "nesting not supported for object or variable length dtypes"); - Npy_releaseContext(decoder->npyarr); - return NULL; -} - -JSOBJ Object_npyEndArrayList(void *prv, JSOBJ obj) { - PyObject *list, *ret; - NpyArrContext *npyarr = (NpyArrContext *)obj; - PRINTMARK(); - if (!npyarr) { - return NULL; - } - - // convert decoded list to numpy array - list = (PyObject *)npyarr->ret; - npyarr->ret = PyArray_FROM_O(list); - - ret = Npy_returnLabelled(npyarr); - npyarr->ret = list; - - ((JSONObjectDecoder *)npyarr->dec)->newArray = Object_npyNewArray; - ((JSONObjectDecoder *)npyarr->dec)->arrayAddItem = Object_npyArrayAddItem; - ((JSONObjectDecoder *)npyarr->dec)->endArray = Object_npyEndArray; - Npy_releaseContext(npyarr); - return ret; -} - -int Object_npyArrayListAddItem(void *prv, JSOBJ obj, JSOBJ value) { - NpyArrContext *npyarr = (NpyArrContext *)obj; - PRINTMARK(); - if (!npyarr) { - return 0; - } - PyList_Append((PyObject *)npyarr->ret, value); - Py_DECREF((PyObject *)value); - npyarr->elcount++; - return 1; -} - -int Object_objectAddKey(void *prv, JSOBJ obj, JSOBJ name, JSOBJ value) { - int ret = PyDict_SetItem(obj, name, value); - Py_DECREF((PyObject *)name); - Py_DECREF((PyObject *)value); - return ret == 0 ? 1 : 0; -} - -int Object_arrayAddItem(void *prv, JSOBJ obj, JSOBJ value) { - int ret = PyList_Append(obj, value); - Py_DECREF((PyObject *)value); - return ret == 0 ? 1 : 0; -} - -JSOBJ Object_newString(void *prv, wchar_t *start, wchar_t *end) { - return PyUnicode_FromWideChar(start, (end - start)); -} - -JSOBJ Object_newTrue(void *prv) { Py_RETURN_TRUE; } - -JSOBJ Object_newFalse(void *prv) { Py_RETURN_FALSE; } - -JSOBJ Object_newNull(void *prv) { Py_RETURN_NONE; } - -JSOBJ Object_newPosInf(void *prv) { return PyFloat_FromDouble(Py_HUGE_VAL); } - -JSOBJ Object_newNegInf(void *prv) { return PyFloat_FromDouble(-Py_HUGE_VAL); } - -JSOBJ Object_newObject(void *prv, void *decoder) { return PyDict_New(); } - -JSOBJ Object_endObject(void *prv, JSOBJ obj) { return obj; } - -JSOBJ Object_newArray(void *prv, void *decoder) { return PyList_New(0); } - -JSOBJ Object_endArray(void *prv, JSOBJ obj) { return obj; } - -JSOBJ Object_newInteger(void *prv, JSINT32 value) { - return PyLong_FromLong((long)value); -} - -JSOBJ Object_newLong(void *prv, JSINT64 value) { - return PyLong_FromLongLong(value); -} - -JSOBJ Object_newUnsignedLong(void *prv, JSUINT64 value) { - return PyLong_FromUnsignedLongLong(value); -} - -JSOBJ Object_newDouble(void *prv, double value) { - return PyFloat_FromDouble(value); -} - -static void Object_releaseObject(void *prv, JSOBJ obj, void *_decoder) { - PyObjectDecoder *decoder = (PyObjectDecoder *)_decoder; - if (obj != decoder->npyarr_addr) { - Py_XDECREF(((PyObject *)obj)); - } -} - -static char *g_kwlist[] = {"obj", "precise_float", - "labelled", "dtype", NULL}; - -PyObject *JSONToObj(PyObject *self, PyObject *args, PyObject *kwargs) { - PyObject *ret; - PyObject *sarg; - PyObject *arg; - PyObject *opreciseFloat = NULL; - JSONObjectDecoder *decoder; - PyObjectDecoder pyDecoder; - PyArray_Descr *dtype = NULL; - int labelled = 0; - - JSONObjectDecoder dec = { - Object_newString, Object_objectAddKey, Object_arrayAddItem, - Object_newTrue, Object_newFalse, Object_newNull, - Object_newPosInf, Object_newNegInf, Object_newObject, - Object_endObject, Object_newArray, Object_endArray, - Object_newInteger, Object_newLong, Object_newUnsignedLong, - Object_newDouble, - Object_releaseObject, PyObject_Malloc, PyObject_Free, - PyObject_Realloc}; - - dec.preciseFloat = 0; - dec.prv = NULL; - - pyDecoder.dec = dec; - pyDecoder.curdim = 0; - pyDecoder.npyarr = NULL; - pyDecoder.npyarr_addr = NULL; - - decoder = (JSONObjectDecoder *)&pyDecoder; - - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|OiiO&", g_kwlist, &arg, - &opreciseFloat, &labelled, - PyArray_DescrConverter2, &dtype)) { - Npy_releaseContext(pyDecoder.npyarr); - return NULL; - } - - if (opreciseFloat && PyObject_IsTrue(opreciseFloat)) { - decoder->preciseFloat = 1; - } - - if (PyBytes_Check(arg)) { - sarg = arg; - } else if (PyUnicode_Check(arg)) { - sarg = PyUnicode_AsUTF8String(arg); - if (sarg == NULL) { - // Exception raised above us by codec according to docs - return NULL; - } - } else { - PyErr_Format(PyExc_TypeError, "Expected 'str' or 'bytes'"); - return NULL; - } - - decoder->errorStr = NULL; - decoder->errorOffset = NULL; - - ret = JSON_DecodeObject(decoder, PyBytes_AS_STRING(sarg), - PyBytes_GET_SIZE(sarg)); - - if (sarg != arg) { - Py_DECREF(sarg); - } - - if (PyErr_Occurred()) { - if (ret) { - Py_DECREF((PyObject *)ret); - } - Npy_releaseContext(pyDecoder.npyarr); - return NULL; - } - - if (decoder->errorStr) { - /* - FIXME: It's possible to give a much nicer error message here with actual - failing element in input etc*/ - - PyErr_Format(PyExc_ValueError, "%s", decoder->errorStr); - - if (ret) { - Py_DECREF((PyObject *)ret); - } - Npy_releaseContext(pyDecoder.npyarr); - - return NULL; - } - - return ret; -} diff --git a/pandas/_libs/src/ujson/python/objToJSON.c b/pandas/_libs/src/ujson/python/objToJSON.c deleted file mode 100644 index 1b8ba8f3f7e6c..0000000000000 --- a/pandas/_libs/src/ujson/python/objToJSON.c +++ /dev/null @@ -1,2135 +0,0 @@ -/* -Copyright (c) 2011-2013, ESN Social Software AB and Jonas Tarnstrom -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: -* Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. -* Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -* Neither the name of the ESN Social Software AB nor the -names of its contributors may be used to endorse or promote products -derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL ESN SOCIAL SOFTWARE AB OR JONAS TARNSTROM BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE -GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF -THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - -Portions of code from MODP_ASCII - Ascii transformations (upper/lower, etc) -https://github.com/client9/stringencoders -Copyright (c) 2007 Nick Galbreath -- nickg [at] modp [dot] com. All rights -reserved. - -Numeric decoder derived from TCL library -https://www.opensource.apple.com/source/tcl/tcl-14/tcl/license.terms -* Copyright (c) 1988-1993 The Regents of the University of California. -* Copyright (c) 1994 Sun Microsystems, Inc. -*/ - -#define PY_SSIZE_T_CLEAN -#include -#include - -#define NO_IMPORT_ARRAY -#define PY_ARRAY_UNIQUE_SYMBOL UJSON_NUMPY -#include -#include -#include -#include -#include -#include "datetime.h" -#include "pd_datetime.h" - -npy_int64 get_nat(void) { return NPY_MIN_INT64; } - -typedef char *(*PFN_PyTypeToUTF8)(JSOBJ obj, JSONTypeContext *ti, - size_t *_outLen); - -int object_is_decimal_type(PyObject *obj); -int object_is_dataframe_type(PyObject *obj); -int object_is_series_type(PyObject *obj); -int object_is_index_type(PyObject *obj); -int object_is_nat_type(PyObject *obj); -int object_is_na_type(PyObject *obj); - -typedef struct __NpyArrContext { - PyObject *array; - char *dataptr; - int curdim; // current dimension in array's order - int stridedim; // dimension we are striding over - int inc; // stride dimension increment (+/- 1) - npy_intp dim; - npy_intp stride; - npy_intp ndim; - npy_intp index[NPY_MAXDIMS]; - int type_num; - PyArray_GetItemFunc *getitem; - - char **rowLabels; - char **columnLabels; -} NpyArrContext; - -typedef struct __PdBlockContext { - int colIdx; - int ncols; - int transpose; - - NpyArrContext **npyCtxts; // NpyArrContext for each column -} PdBlockContext; - -typedef struct __TypeContext { - JSPFN_ITERBEGIN iterBegin; - JSPFN_ITEREND iterEnd; - JSPFN_ITERNEXT iterNext; - JSPFN_ITERGETNAME iterGetName; - JSPFN_ITERGETVALUE iterGetValue; - PFN_PyTypeToUTF8 PyTypeToUTF8; - PyObject *newObj; - PyObject *dictObj; - Py_ssize_t index; - Py_ssize_t size; - PyObject *itemValue; - PyObject *itemName; - PyObject *attrList; - PyObject *iterator; - - double doubleValue; - JSINT64 longValue; - - char *cStr; - NpyArrContext *npyarr; - PdBlockContext *pdblock; - int transpose; - char **rowLabels; - char **columnLabels; - npy_intp rowLabelsLen; - npy_intp columnLabelsLen; -} TypeContext; - -typedef struct __PyObjectEncoder { - JSONObjectEncoder enc; - - // pass through the NpyArrContext when encoding multi-dimensional arrays - NpyArrContext *npyCtxtPassthru; - - // pass through the PdBlockContext when encoding blocks - PdBlockContext *blkCtxtPassthru; - - // pass-through to encode numpy data directly - int npyType; - void *npyValue; - - int datetimeIso; - NPY_DATETIMEUNIT datetimeUnit; - - // output format style for pandas data types - int outputFormat; - int originalOutputFormat; - - PyObject *defaultHandler; -} PyObjectEncoder; - -#define GET_TC(__ptrtc) ((TypeContext *)((__ptrtc)->prv)) - -enum PANDAS_FORMAT { SPLIT, RECORDS, INDEX, COLUMNS, VALUES }; - -int PdBlock_iterNext(JSOBJ, JSONTypeContext *); - -static TypeContext *createTypeContext(void) { - TypeContext *pc; - - pc = PyObject_Malloc(sizeof(TypeContext)); - if (!pc) { - PyErr_NoMemory(); - return NULL; - } - pc->newObj = NULL; - pc->dictObj = NULL; - pc->itemValue = NULL; - pc->itemName = NULL; - pc->attrList = NULL; - pc->index = 0; - pc->size = 0; - pc->longValue = 0; - pc->doubleValue = 0.0; - pc->cStr = NULL; - pc->npyarr = NULL; - pc->pdblock = NULL; - pc->rowLabels = NULL; - pc->columnLabels = NULL; - pc->transpose = 0; - pc->rowLabelsLen = 0; - pc->columnLabelsLen = 0; - - return pc; -} - -static PyObject *get_values(PyObject *obj) { - PyObject *values = NULL; - - if (object_is_index_type(obj) || object_is_series_type(obj)) { - // The special cases to worry about are dt64tz and category[dt64tz]. - // In both cases we want the UTC-localized datetime64 ndarray, - // without going through and object array of Timestamps. - if (PyObject_HasAttrString(obj, "tz")) { - PyObject *tz = PyObject_GetAttrString(obj, "tz"); - if (tz != Py_None) { - // Go through object array if we have dt64tz, since tz info will - // be lost if values is used directly. - Py_DECREF(tz); - values = PyObject_CallMethod(obj, "__array__", NULL); - return values; - } - Py_DECREF(tz); - } - values = PyObject_GetAttrString(obj, "values"); - if (values == NULL) { - // Clear so we can subsequently try another method - PyErr_Clear(); - } else if (PyObject_HasAttrString(values, "__array__")) { - // We may have gotten a Categorical or Sparse array so call np.array - PyObject *array_values = PyObject_CallMethod(values, "__array__", - NULL); - Py_DECREF(values); - values = array_values; - } else if (!PyArray_CheckExact(values)) { - // Didn't get a numpy array, so keep trying - Py_DECREF(values); - values = NULL; - } - } - - if (values == NULL) { - PyObject *typeRepr = PyObject_Repr((PyObject *)Py_TYPE(obj)); - PyObject *repr; - if (PyObject_HasAttrString(obj, "dtype")) { - PyObject *dtype = PyObject_GetAttrString(obj, "dtype"); - repr = PyObject_Repr(dtype); - Py_DECREF(dtype); - } else { - repr = PyUnicode_FromString(""); - } - - PyErr_Format(PyExc_ValueError, "%R or %R are not JSON serializable yet", - repr, typeRepr); - Py_DECREF(repr); - Py_DECREF(typeRepr); - - return NULL; - } - - return values; -} - -static PyObject *get_sub_attr(PyObject *obj, char *attr, char *subAttr) { - PyObject *tmp = PyObject_GetAttrString(obj, attr); - PyObject *ret; - - if (tmp == 0) { - return 0; - } - ret = PyObject_GetAttrString(tmp, subAttr); - Py_DECREF(tmp); - - return ret; -} - -static Py_ssize_t get_attr_length(PyObject *obj, char *attr) { - PyObject *tmp = PyObject_GetAttrString(obj, attr); - Py_ssize_t ret; - - if (tmp == 0) { - return 0; - } - ret = PyObject_Length(tmp); - Py_DECREF(tmp); - - if (ret == -1) { - return 0; - } - - return ret; -} - -static int is_simple_frame(PyObject *obj) { - PyObject *mgr = PyObject_GetAttrString(obj, "_mgr"); - if (!mgr) { - return 0; - } - int ret; - if (PyObject_HasAttrString(mgr, "blocks")) { - ret = (get_attr_length(mgr, "blocks") <= 1); - } else { - ret = 0; - } - - Py_DECREF(mgr); - return ret; -} - -static npy_int64 get_long_attr(PyObject *o, const char *attr) { - // NB we are implicitly assuming that o is a Timedelta or Timestamp, or NaT - - npy_int64 long_val; - PyObject *value = PyObject_GetAttrString(o, attr); - long_val = - (PyLong_Check(value) ? PyLong_AsLongLong(value) : PyLong_AsLong(value)); - - Py_DECREF(value); - - if (object_is_nat_type(o)) { - // i.e. o is NaT, long_val will be NPY_MIN_INT64 - return long_val; - } - - // ensure we are in nanoseconds, similar to Timestamp._as_creso or _as_unit - PyObject* reso = PyObject_GetAttrString(o, "_creso"); - if (!PyLong_Check(reso)) { - // https://github.com/pandas-dev/pandas/pull/49034#discussion_r1023165139 - Py_DECREF(reso); - return -1; - } - - long cReso = PyLong_AsLong(reso); - Py_DECREF(reso); - if (cReso == -1 && PyErr_Occurred()) { - return -1; - } - - if (cReso == NPY_FR_us) { - long_val = long_val * 1000L; - } else if (cReso == NPY_FR_ms) { - long_val = long_val * 1000000L; - } else if (cReso == NPY_FR_s) { - long_val = long_val * 1000000000L; - } - - return long_val; -} - -static npy_float64 total_seconds(PyObject *td) { - npy_float64 double_val; - PyObject *value = PyObject_CallMethod(td, "total_seconds", NULL); - double_val = PyFloat_AS_DOUBLE(value); - Py_DECREF(value); - return double_val; -} - -static char *PyBytesToUTF8(JSOBJ _obj, JSONTypeContext *Py_UNUSED(tc), - size_t *_outLen) { - PyObject *obj = (PyObject *)_obj; - *_outLen = PyBytes_GET_SIZE(obj); - return PyBytes_AS_STRING(obj); -} - -static char *PyUnicodeToUTF8(JSOBJ _obj, JSONTypeContext *tc, - size_t *_outLen) { - char *encoded = (char *)PyUnicode_AsUTF8AndSize(_obj, - (Py_ssize_t *)_outLen); - if (encoded == NULL) { - /* Something went wrong. - Set errorMsg(to tell encoder to stop), - and let Python exception propagate. */ - JSONObjectEncoder *enc = (JSONObjectEncoder *)tc->encoder; - enc->errorMsg = "Encoding failed."; - } - return encoded; -} - -/* JSON callback. returns a char* and mutates the pointer to *len */ -static char *NpyDateTimeToIsoCallback(JSOBJ Py_UNUSED(unused), - JSONTypeContext *tc, size_t *len) { - NPY_DATETIMEUNIT base = ((PyObjectEncoder *)tc->encoder)->datetimeUnit; - GET_TC(tc)->cStr = int64ToIso(GET_TC(tc)->longValue, base, len); - return GET_TC(tc)->cStr; -} - -/* JSON callback. returns a char* and mutates the pointer to *len */ -static char *NpyTimeDeltaToIsoCallback(JSOBJ Py_UNUSED(unused), - JSONTypeContext *tc, size_t *len) { - GET_TC(tc)->cStr = int64ToIsoDuration(GET_TC(tc)->longValue, len); - return GET_TC(tc)->cStr; -} - -/* JSON callback */ -static char *PyDateTimeToIsoCallback(JSOBJ obj, JSONTypeContext *tc, - size_t *len) { - if (!PyDate_Check(obj)) { - PyErr_SetString(PyExc_TypeError, "Expected date object"); - return NULL; - } - - NPY_DATETIMEUNIT base = ((PyObjectEncoder *)tc->encoder)->datetimeUnit; - return PyDateTimeToIso(obj, base, len); -} - -static char *PyTimeToJSON(JSOBJ _obj, JSONTypeContext *tc, size_t *outLen) { - PyObject *obj = (PyObject *)_obj; - PyObject *str; - PyObject *tmp; - - str = PyObject_CallMethod(obj, "isoformat", NULL); - if (str == NULL) { - *outLen = 0; - if (!PyErr_Occurred()) { - PyErr_SetString(PyExc_ValueError, "Failed to convert time"); - } - ((JSONObjectEncoder *)tc->encoder)->errorMsg = ""; - return NULL; - } - if (PyUnicode_Check(str)) { - tmp = str; - str = PyUnicode_AsUTF8String(str); - Py_DECREF(tmp); - } - - GET_TC(tc)->newObj = str; - - *outLen = PyBytes_GET_SIZE(str); - char *outValue = PyBytes_AS_STRING(str); - return outValue; -} - -//============================================================================= -// Numpy array iteration functions -//============================================================================= - -static void NpyArr_freeItemValue(JSOBJ Py_UNUSED(_obj), JSONTypeContext *tc) { - if (GET_TC(tc)->npyarr && - GET_TC(tc)->itemValue != GET_TC(tc)->npyarr->array) { - Py_XDECREF(GET_TC(tc)->itemValue); - GET_TC(tc)->itemValue = NULL; - } -} - -int NpyArr_iterNextNone(JSOBJ Py_UNUSED(_obj), JSONTypeContext *Py_UNUSED(tc)) { - return 0; -} - -void NpyArr_iterBegin(JSOBJ _obj, JSONTypeContext *tc) { - PyArrayObject *obj; - NpyArrContext *npyarr; - - if (GET_TC(tc)->newObj) { - obj = (PyArrayObject *)GET_TC(tc)->newObj; - } else { - obj = (PyArrayObject *)_obj; - } - - npyarr = PyObject_Malloc(sizeof(NpyArrContext)); - GET_TC(tc)->npyarr = npyarr; - - if (!npyarr) { - PyErr_NoMemory(); - GET_TC(tc)->iterNext = NpyArr_iterNextNone; - return; - } - - npyarr->array = (PyObject *)obj; - npyarr->getitem = (PyArray_GetItemFunc *)PyArray_DESCR(obj)->f->getitem; - npyarr->dataptr = PyArray_DATA(obj); - npyarr->ndim = PyArray_NDIM(obj) - 1; - npyarr->curdim = 0; - npyarr->type_num = PyArray_DESCR(obj)->type_num; - - if (GET_TC(tc)->transpose) { - npyarr->dim = PyArray_DIM(obj, npyarr->ndim); - npyarr->stride = PyArray_STRIDE(obj, npyarr->ndim); - npyarr->stridedim = npyarr->ndim; - npyarr->index[npyarr->ndim] = 0; - npyarr->inc = -1; - } else { - npyarr->dim = PyArray_DIM(obj, 0); - npyarr->stride = PyArray_STRIDE(obj, 0); - npyarr->stridedim = 0; - npyarr->index[0] = 0; - npyarr->inc = 1; - } - - npyarr->columnLabels = GET_TC(tc)->columnLabels; - npyarr->rowLabels = GET_TC(tc)->rowLabels; -} - -void NpyArr_iterEnd(JSOBJ obj, JSONTypeContext *tc) { - NpyArrContext *npyarr = GET_TC(tc)->npyarr; - - if (npyarr) { - NpyArr_freeItemValue(obj, tc); - PyObject_Free(npyarr); - } -} - -void NpyArrPassThru_iterBegin(JSOBJ Py_UNUSED(obj), - JSONTypeContext *Py_UNUSED(tc)) {} - -void NpyArrPassThru_iterEnd(JSOBJ obj, JSONTypeContext *tc) { - NpyArrContext *npyarr = GET_TC(tc)->npyarr; - // finished this dimension, reset the data pointer - npyarr->curdim--; - npyarr->dataptr -= npyarr->stride * npyarr->index[npyarr->stridedim]; - npyarr->stridedim -= npyarr->inc; - npyarr->dim = PyArray_DIM(npyarr->array, npyarr->stridedim); - npyarr->stride = PyArray_STRIDE(npyarr->array, npyarr->stridedim); - npyarr->dataptr += npyarr->stride; - - NpyArr_freeItemValue(obj, tc); -} - -int NpyArr_iterNextItem(JSOBJ obj, JSONTypeContext *tc) { - NpyArrContext *npyarr = GET_TC(tc)->npyarr; - - if (PyErr_Occurred()) { - return 0; - } - - if (npyarr->index[npyarr->stridedim] >= npyarr->dim) { - return 0; - } - - NpyArr_freeItemValue(obj, tc); - - if (PyArray_ISDATETIME(npyarr->array)) { - GET_TC(tc)->itemValue = obj; - Py_INCREF(obj); - ((PyObjectEncoder *)tc->encoder)->npyType = PyArray_TYPE(npyarr->array); - ((PyObjectEncoder *)tc->encoder)->npyValue = npyarr->dataptr; - ((PyObjectEncoder *)tc->encoder)->npyCtxtPassthru = npyarr; - } else { - GET_TC(tc)->itemValue = npyarr->getitem(npyarr->dataptr, npyarr->array); - } - - npyarr->dataptr += npyarr->stride; - npyarr->index[npyarr->stridedim]++; - return 1; -} - -int NpyArr_iterNext(JSOBJ _obj, JSONTypeContext *tc) { - NpyArrContext *npyarr = GET_TC(tc)->npyarr; - - if (PyErr_Occurred()) { - return 0; - } - - if (npyarr->curdim >= npyarr->ndim || - npyarr->index[npyarr->stridedim] >= npyarr->dim) { - // innermost dimension, start retrieving item values - GET_TC(tc)->iterNext = NpyArr_iterNextItem; - return NpyArr_iterNextItem(_obj, tc); - } - - // dig a dimension deeper - npyarr->index[npyarr->stridedim]++; - - npyarr->curdim++; - npyarr->stridedim += npyarr->inc; - npyarr->dim = PyArray_DIM(npyarr->array, npyarr->stridedim); - npyarr->stride = PyArray_STRIDE(npyarr->array, npyarr->stridedim); - npyarr->index[npyarr->stridedim] = 0; - - ((PyObjectEncoder *)tc->encoder)->npyCtxtPassthru = npyarr; - GET_TC(tc)->itemValue = npyarr->array; - return 1; -} - -JSOBJ NpyArr_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { - return GET_TC(tc)->itemValue; -} - -char *NpyArr_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc, - size_t *outLen) { - NpyArrContext *npyarr = GET_TC(tc)->npyarr; - npy_intp idx; - char *cStr; - - if (GET_TC(tc)->iterNext == NpyArr_iterNextItem) { - idx = npyarr->index[npyarr->stridedim] - 1; - cStr = npyarr->columnLabels[idx]; - } else { - idx = npyarr->index[npyarr->stridedim - npyarr->inc] - 1; - cStr = npyarr->rowLabels[idx]; - } - - *outLen = strlen(cStr); - - return cStr; -} - -//============================================================================= -// Pandas block iteration functions -// -// Serialises a DataFrame column by column to avoid unnecessary data copies and -// more representative serialisation when dealing with mixed dtypes. -// -// Uses a dedicated NpyArrContext for each column. -//============================================================================= - -void PdBlockPassThru_iterEnd(JSOBJ obj, JSONTypeContext *tc) { - PdBlockContext *blkCtxt = GET_TC(tc)->pdblock; - - if (blkCtxt->transpose) { - blkCtxt->colIdx++; - } else { - blkCtxt->colIdx = 0; - } - - NpyArr_freeItemValue(obj, tc); -} - -int PdBlock_iterNextItem(JSOBJ obj, JSONTypeContext *tc) { - PdBlockContext *blkCtxt = GET_TC(tc)->pdblock; - - if (blkCtxt->colIdx >= blkCtxt->ncols) { - return 0; - } - - GET_TC(tc)->npyarr = blkCtxt->npyCtxts[blkCtxt->colIdx]; - blkCtxt->colIdx++; - return NpyArr_iterNextItem(obj, tc); -} - -char *PdBlock_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc, - size_t *outLen) { - PdBlockContext *blkCtxt = GET_TC(tc)->pdblock; - NpyArrContext *npyarr = blkCtxt->npyCtxts[0]; - npy_intp idx; - char *cStr; - - if (GET_TC(tc)->iterNext == PdBlock_iterNextItem) { - idx = blkCtxt->colIdx - 1; - cStr = npyarr->columnLabels[idx]; - } else { - idx = GET_TC(tc)->iterNext != PdBlock_iterNext - ? npyarr->index[npyarr->stridedim - npyarr->inc] - 1 - : npyarr->index[npyarr->stridedim]; - - cStr = npyarr->rowLabels[idx]; - } - - *outLen = strlen(cStr); - return cStr; -} - -char *PdBlock_iterGetName_Transpose(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc, - size_t *outLen) { - PdBlockContext *blkCtxt = GET_TC(tc)->pdblock; - NpyArrContext *npyarr = blkCtxt->npyCtxts[blkCtxt->colIdx]; - npy_intp idx; - char *cStr; - - if (GET_TC(tc)->iterNext == NpyArr_iterNextItem) { - idx = npyarr->index[npyarr->stridedim] - 1; - cStr = npyarr->columnLabels[idx]; - } else { - idx = blkCtxt->colIdx; - cStr = npyarr->rowLabels[idx]; - } - - *outLen = strlen(cStr); - return cStr; -} - -int PdBlock_iterNext(JSOBJ obj, JSONTypeContext *tc) { - PdBlockContext *blkCtxt = GET_TC(tc)->pdblock; - NpyArrContext *npyarr; - - if (PyErr_Occurred() || ((JSONObjectEncoder *)tc->encoder)->errorMsg) { - return 0; - } - - if (blkCtxt->transpose) { - if (blkCtxt->colIdx >= blkCtxt->ncols) { - return 0; - } - } else { - npyarr = blkCtxt->npyCtxts[0]; - if (npyarr->index[npyarr->stridedim] >= npyarr->dim) { - return 0; - } - } - - ((PyObjectEncoder *)tc->encoder)->blkCtxtPassthru = blkCtxt; - GET_TC(tc)->itemValue = obj; - - return 1; -} - -void PdBlockPassThru_iterBegin(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { - PdBlockContext *blkCtxt = GET_TC(tc)->pdblock; - - if (blkCtxt->transpose) { - // if transposed we exhaust each column before moving to the next - GET_TC(tc)->iterNext = NpyArr_iterNextItem; - GET_TC(tc)->iterGetName = PdBlock_iterGetName_Transpose; - GET_TC(tc)->npyarr = blkCtxt->npyCtxts[blkCtxt->colIdx]; - } -} - -void PdBlock_iterBegin(JSOBJ _obj, JSONTypeContext *tc) { - PyObject *obj, *values, *arrays, *array; - PdBlockContext *blkCtxt; - NpyArrContext *npyarr; - Py_ssize_t i; - - obj = (PyObject *)_obj; - - GET_TC(tc)->iterGetName = GET_TC(tc)->transpose - ? PdBlock_iterGetName_Transpose - : PdBlock_iterGetName; - - blkCtxt = PyObject_Malloc(sizeof(PdBlockContext)); - if (!blkCtxt) { - PyErr_NoMemory(); - GET_TC(tc)->iterNext = NpyArr_iterNextNone; - return; - } - GET_TC(tc)->pdblock = blkCtxt; - - blkCtxt->colIdx = 0; - blkCtxt->transpose = GET_TC(tc)->transpose; - blkCtxt->ncols = get_attr_length(obj, "columns"); - - if (blkCtxt->ncols == 0) { - blkCtxt->npyCtxts = NULL; - - GET_TC(tc)->iterNext = NpyArr_iterNextNone; - return; - } - - blkCtxt->npyCtxts = - PyObject_Malloc(sizeof(NpyArrContext *) * blkCtxt->ncols); - if (!blkCtxt->npyCtxts) { - PyErr_NoMemory(); - GET_TC(tc)->iterNext = NpyArr_iterNextNone; - return; - } - - arrays = get_sub_attr(obj, "_mgr", "column_arrays"); - if (!arrays) { - GET_TC(tc)->iterNext = NpyArr_iterNextNone; - return; - } - - for (i = 0; i < PyObject_Length(arrays); i++) { - array = PyList_GET_ITEM(arrays, i); - if (!array) { - GET_TC(tc)->iterNext = NpyArr_iterNextNone; - goto ARR_RET; - } - - // ensure we have a numpy array (i.e. np.asarray) - values = PyObject_CallMethod(array, "__array__", NULL); - if ((!values) || (!PyArray_CheckExact(values))) { - // Didn't get a numpy array - ((JSONObjectEncoder *)tc->encoder)->errorMsg = ""; - GET_TC(tc)->iterNext = NpyArr_iterNextNone; - goto ARR_RET; - } - - GET_TC(tc)->newObj = values; - - // init a dedicated context for this column - NpyArr_iterBegin(obj, tc); - npyarr = GET_TC(tc)->npyarr; - - GET_TC(tc)->itemValue = NULL; - ((PyObjectEncoder *)tc->encoder)->npyCtxtPassthru = NULL; - - blkCtxt->npyCtxts[i] = npyarr; - GET_TC(tc)->newObj = NULL; - } - GET_TC(tc)->npyarr = blkCtxt->npyCtxts[0]; - goto ARR_RET; - -ARR_RET: - Py_DECREF(arrays); -} - -void PdBlock_iterEnd(JSOBJ obj, JSONTypeContext *tc) { - PdBlockContext *blkCtxt; - NpyArrContext *npyarr; - int i; - - GET_TC(tc)->itemValue = NULL; - npyarr = GET_TC(tc)->npyarr; - - blkCtxt = GET_TC(tc)->pdblock; - - if (blkCtxt) { - for (i = 0; i < blkCtxt->ncols; i++) { - npyarr = blkCtxt->npyCtxts[i]; - if (npyarr) { - if (npyarr->array) { - Py_DECREF(npyarr->array); - npyarr->array = NULL; - } - - GET_TC(tc)->npyarr = npyarr; - NpyArr_iterEnd(obj, tc); - - blkCtxt->npyCtxts[i] = NULL; - } - } - - if (blkCtxt->npyCtxts) { - PyObject_Free(blkCtxt->npyCtxts); - } - PyObject_Free(blkCtxt); - } -} - -//============================================================================= -// Tuple iteration functions -// itemValue is borrowed reference, no ref counting -//============================================================================= -void Tuple_iterBegin(JSOBJ obj, JSONTypeContext *tc) { - GET_TC(tc)->index = 0; - GET_TC(tc)->size = PyTuple_GET_SIZE((PyObject *)obj); - GET_TC(tc)->itemValue = NULL; -} - -int Tuple_iterNext(JSOBJ obj, JSONTypeContext *tc) { - PyObject *item; - - if (GET_TC(tc)->index >= GET_TC(tc)->size) { - return 0; - } - - item = PyTuple_GET_ITEM(obj, GET_TC(tc)->index); - - GET_TC(tc)->itemValue = item; - GET_TC(tc)->index++; - return 1; -} - -void Tuple_iterEnd(JSOBJ Py_UNUSED(obj), JSONTypeContext *Py_UNUSED(tc)) {} - -JSOBJ Tuple_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { - return GET_TC(tc)->itemValue; -} - -char *Tuple_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *Py_UNUSED(tc), - size_t *Py_UNUSED(outLen)) { - return NULL; -} - -//============================================================================= -// Set iteration functions -// itemValue is borrowed reference, no ref counting -//============================================================================= -void Set_iterBegin(JSOBJ obj, JSONTypeContext *tc) { - GET_TC(tc)->itemValue = NULL; - GET_TC(tc)->iterator = PyObject_GetIter(obj); -} - -int Set_iterNext(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { - PyObject *item; - - if (GET_TC(tc)->itemValue) { - Py_DECREF(GET_TC(tc)->itemValue); - GET_TC(tc)->itemValue = NULL; - } - - item = PyIter_Next(GET_TC(tc)->iterator); - - if (item == NULL) { - return 0; - } - - GET_TC(tc)->itemValue = item; - return 1; -} - -void Set_iterEnd(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { - if (GET_TC(tc)->itemValue) { - Py_DECREF(GET_TC(tc)->itemValue); - GET_TC(tc)->itemValue = NULL; - } - - if (GET_TC(tc)->iterator) { - Py_DECREF(GET_TC(tc)->iterator); - GET_TC(tc)->iterator = NULL; - } -} - -JSOBJ Set_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { - return GET_TC(tc)->itemValue; -} - -char *Set_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *Py_UNUSED(tc), - size_t *Py_UNUSED(outLen)) { - return NULL; -} - -//============================================================================= -// Dir iteration functions -// itemName ref is borrowed from PyObject_Dir (attrList). No refcount -// itemValue ref is from PyObject_GetAttr. Ref counted -//============================================================================= -void Dir_iterBegin(JSOBJ obj, JSONTypeContext *tc) { - GET_TC(tc)->attrList = PyObject_Dir(obj); - GET_TC(tc)->index = 0; - GET_TC(tc)->size = PyList_GET_SIZE(GET_TC(tc)->attrList); -} - -void Dir_iterEnd(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { - if (GET_TC(tc)->itemValue) { - Py_DECREF(GET_TC(tc)->itemValue); - GET_TC(tc)->itemValue = NULL; - } - - if (GET_TC(tc)->itemName) { - Py_DECREF(GET_TC(tc)->itemName); - GET_TC(tc)->itemName = NULL; - } - - Py_DECREF((PyObject *)GET_TC(tc)->attrList); -} - -int Dir_iterNext(JSOBJ _obj, JSONTypeContext *tc) { - PyObject *obj = (PyObject *)_obj; - PyObject *itemValue = GET_TC(tc)->itemValue; - PyObject *itemName = GET_TC(tc)->itemName; - PyObject *attr; - PyObject *attrName; - char *attrStr; - - if (PyErr_Occurred() || ((JSONObjectEncoder *)tc->encoder)->errorMsg) { - return 0; - } - - if (itemValue) { - Py_DECREF(GET_TC(tc)->itemValue); - GET_TC(tc)->itemValue = itemValue = NULL; - } - - if (itemName) { - Py_DECREF(GET_TC(tc)->itemName); - GET_TC(tc)->itemName = itemName = NULL; - } - - for (; GET_TC(tc)->index < GET_TC(tc)->size; GET_TC(tc)->index++) { - attrName = PyList_GET_ITEM(GET_TC(tc)->attrList, GET_TC(tc)->index); - attr = PyUnicode_AsUTF8String(attrName); - attrStr = PyBytes_AS_STRING(attr); - - if (attrStr[0] == '_') { - Py_DECREF(attr); - continue; - } - - itemValue = PyObject_GetAttr(obj, attrName); - if (itemValue == NULL) { - PyErr_Clear(); - Py_DECREF(attr); - continue; - } - - if (PyCallable_Check(itemValue)) { - Py_DECREF(itemValue); - Py_DECREF(attr); - continue; - } - - GET_TC(tc)->itemName = itemName; - GET_TC(tc)->itemValue = itemValue; - - itemName = attr; - break; - } - - if (itemName == NULL) { - GET_TC(tc)->index = GET_TC(tc)->size; - GET_TC(tc)->itemValue = NULL; - return 0; - } - - GET_TC(tc)->itemName = itemName; - GET_TC(tc)->itemValue = itemValue; - GET_TC(tc)->index++; - - return 1; -} - -JSOBJ Dir_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { - return GET_TC(tc)->itemValue; -} - -char *Dir_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc, - size_t *outLen) { - *outLen = PyBytes_GET_SIZE(GET_TC(tc)->itemName); - return PyBytes_AS_STRING(GET_TC(tc)->itemName); -} - -//============================================================================= -// List iteration functions -// itemValue is borrowed from object (which is list). No refcounting -//============================================================================= -void List_iterBegin(JSOBJ obj, JSONTypeContext *tc) { - GET_TC(tc)->index = 0; - GET_TC(tc)->size = PyList_GET_SIZE((PyObject *)obj); -} - -int List_iterNext(JSOBJ obj, JSONTypeContext *tc) { - if (GET_TC(tc)->index >= GET_TC(tc)->size) { - return 0; - } - - GET_TC(tc)->itemValue = PyList_GET_ITEM(obj, GET_TC(tc)->index); - GET_TC(tc)->index++; - return 1; -} - -void List_iterEnd(JSOBJ Py_UNUSED(obj), JSONTypeContext *Py_UNUSED(tc)) {} - -JSOBJ List_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { - return GET_TC(tc)->itemValue; -} - -char *List_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *Py_UNUSED(tc), - size_t *Py_UNUSED(outLen)) { - return NULL; -} - -//============================================================================= -// pandas Index iteration functions -//============================================================================= -void Index_iterBegin(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { - GET_TC(tc)->index = 0; - GET_TC(tc)->cStr = PyObject_Malloc(20 * sizeof(char)); - if (!GET_TC(tc)->cStr) { - PyErr_NoMemory(); - } -} - -int Index_iterNext(JSOBJ obj, JSONTypeContext *tc) { - Py_ssize_t index; - if (!GET_TC(tc)->cStr) { - return 0; - } - - index = GET_TC(tc)->index; - Py_XDECREF(GET_TC(tc)->itemValue); - if (index == 0) { - memcpy(GET_TC(tc)->cStr, "name", sizeof(char) * 5); - GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "name"); - } else if (index == 1) { - memcpy(GET_TC(tc)->cStr, "data", sizeof(char) * 5); - GET_TC(tc)->itemValue = get_values(obj); - if (!GET_TC(tc)->itemValue) { - return 0; - } - } else { - return 0; - } - - GET_TC(tc)->index++; - return 1; -} - -void Index_iterEnd(JSOBJ Py_UNUSED(obj), JSONTypeContext *Py_UNUSED(tc)) {} - -JSOBJ Index_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { - return GET_TC(tc)->itemValue; -} - -char *Index_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc, - size_t *outLen) { - *outLen = strlen(GET_TC(tc)->cStr); - return GET_TC(tc)->cStr; -} - -//============================================================================= -// pandas Series iteration functions -//============================================================================= -void Series_iterBegin(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { - PyObjectEncoder *enc = (PyObjectEncoder *)tc->encoder; - GET_TC(tc)->index = 0; - GET_TC(tc)->cStr = PyObject_Malloc(20 * sizeof(char)); - enc->outputFormat = VALUES; // for contained series - if (!GET_TC(tc)->cStr) { - PyErr_NoMemory(); - } -} - -int Series_iterNext(JSOBJ obj, JSONTypeContext *tc) { - Py_ssize_t index; - if (!GET_TC(tc)->cStr) { - return 0; - } - - index = GET_TC(tc)->index; - Py_XDECREF(GET_TC(tc)->itemValue); - if (index == 0) { - memcpy(GET_TC(tc)->cStr, "name", sizeof(char) * 5); - GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "name"); - } else if (index == 1) { - memcpy(GET_TC(tc)->cStr, "index", sizeof(char) * 6); - GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "index"); - } else if (index == 2) { - memcpy(GET_TC(tc)->cStr, "data", sizeof(char) * 5); - GET_TC(tc)->itemValue = get_values(obj); - if (!GET_TC(tc)->itemValue) { - return 0; - } - } else { - return 0; - } - - GET_TC(tc)->index++; - return 1; -} - -void Series_iterEnd(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { - PyObjectEncoder *enc = (PyObjectEncoder *)tc->encoder; - enc->outputFormat = enc->originalOutputFormat; -} - -JSOBJ Series_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { - return GET_TC(tc)->itemValue; -} - -char *Series_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc, - size_t *outLen) { - *outLen = strlen(GET_TC(tc)->cStr); - return GET_TC(tc)->cStr; -} - -//============================================================================= -// pandas DataFrame iteration functions -//============================================================================= -void DataFrame_iterBegin(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { - PyObjectEncoder *enc = (PyObjectEncoder *)tc->encoder; - GET_TC(tc)->index = 0; - GET_TC(tc)->cStr = PyObject_Malloc(20 * sizeof(char)); - enc->outputFormat = VALUES; // for contained series & index - if (!GET_TC(tc)->cStr) { - PyErr_NoMemory(); - } -} - -int DataFrame_iterNext(JSOBJ obj, JSONTypeContext *tc) { - Py_ssize_t index; - if (!GET_TC(tc)->cStr) { - return 0; - } - - index = GET_TC(tc)->index; - Py_XDECREF(GET_TC(tc)->itemValue); - if (index == 0) { - memcpy(GET_TC(tc)->cStr, "columns", sizeof(char) * 8); - GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "columns"); - } else if (index == 1) { - memcpy(GET_TC(tc)->cStr, "index", sizeof(char) * 6); - GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "index"); - } else if (index == 2) { - memcpy(GET_TC(tc)->cStr, "data", sizeof(char) * 5); - if (is_simple_frame(obj)) { - GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "values"); - if (!GET_TC(tc)->itemValue) { - return 0; - } - } else { - Py_INCREF(obj); - GET_TC(tc)->itemValue = obj; - } - } else { - return 0; - } - - GET_TC(tc)->index++; - return 1; -} - -void DataFrame_iterEnd(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { - PyObjectEncoder *enc = (PyObjectEncoder *)tc->encoder; - enc->outputFormat = enc->originalOutputFormat; -} - -JSOBJ DataFrame_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { - return GET_TC(tc)->itemValue; -} - -char *DataFrame_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc, - size_t *outLen) { - *outLen = strlen(GET_TC(tc)->cStr); - return GET_TC(tc)->cStr; -} - -//============================================================================= -// Dict iteration functions -// itemName might converted to string (Python_Str). Do refCounting -// itemValue is borrowed from object (which is dict). No refCounting -//============================================================================= -void Dict_iterBegin(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { - GET_TC(tc)->index = 0; -} - -int Dict_iterNext(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { - PyObject *itemNameTmp; - - if (GET_TC(tc)->itemName) { - Py_DECREF(GET_TC(tc)->itemName); - GET_TC(tc)->itemName = NULL; - } - - if (!PyDict_Next((PyObject *)GET_TC(tc)->dictObj, &GET_TC(tc)->index, - &GET_TC(tc)->itemName, &GET_TC(tc)->itemValue)) { - return 0; - } - - if (PyUnicode_Check(GET_TC(tc)->itemName)) { - GET_TC(tc)->itemName = PyUnicode_AsUTF8String(GET_TC(tc)->itemName); - } else if (!PyBytes_Check(GET_TC(tc)->itemName)) { - GET_TC(tc)->itemName = PyObject_Str(GET_TC(tc)->itemName); - itemNameTmp = GET_TC(tc)->itemName; - GET_TC(tc)->itemName = PyUnicode_AsUTF8String(GET_TC(tc)->itemName); - Py_DECREF(itemNameTmp); - } else { - Py_INCREF(GET_TC(tc)->itemName); - } - return 1; -} - -void Dict_iterEnd(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { - if (GET_TC(tc)->itemName) { - Py_DECREF(GET_TC(tc)->itemName); - GET_TC(tc)->itemName = NULL; - } - Py_DECREF(GET_TC(tc)->dictObj); -} - -JSOBJ Dict_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { - return GET_TC(tc)->itemValue; -} - -char *Dict_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc, - size_t *outLen) { - *outLen = PyBytes_GET_SIZE(GET_TC(tc)->itemName); - return PyBytes_AS_STRING(GET_TC(tc)->itemName); -} - -void NpyArr_freeLabels(char **labels, npy_intp len) { - npy_intp i; - - if (labels) { - for (i = 0; i < len; i++) { - PyObject_Free(labels[i]); - } - PyObject_Free(labels); - } -} - -/* - * Function: NpyArr_encodeLabels - * ----------------------------- - * - * Builds an array of "encoded" labels. - * - * labels: PyArrayObject pointer for labels to be "encoded" - * num : number of labels - * - * "encode" is quoted above because we aren't really doing encoding - * For historical reasons this function would actually encode the entire - * array into a separate buffer with a separate call to JSON_Encode - * and would leave it to complex pointer manipulation from there to - * unpack values as needed. To make things simpler and more idiomatic - * this has instead just stringified any input save for datetime values, - * which may need to be represented in various formats. - */ -char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc, - npy_intp num) { - // NOTE this function steals a reference to labels. - PyObject *item = NULL; - size_t len; - npy_intp i, stride; - char **ret; - char *dataptr, *cLabel; - int type_num; - NPY_DATETIMEUNIT base = enc->datetimeUnit; - - if (!labels) { - return 0; - } - - if (PyArray_SIZE(labels) < num) { - PyErr_SetString( - PyExc_ValueError, - "Label array sizes do not match corresponding data shape"); - Py_DECREF(labels); - return 0; - } - - ret = PyObject_Malloc(sizeof(char *) * num); - if (!ret) { - PyErr_NoMemory(); - Py_DECREF(labels); - return 0; - } - - for (i = 0; i < num; i++) { - ret[i] = NULL; - } - - stride = PyArray_STRIDE(labels, 0); - dataptr = PyArray_DATA(labels); - type_num = PyArray_TYPE(labels); - - for (i = 0; i < num; i++) { - item = PyArray_GETITEM(labels, dataptr); - if (!item) { - NpyArr_freeLabels(ret, num); - ret = 0; - break; - } - - int is_datetimelike = 0; - npy_int64 nanosecVal; - if (PyTypeNum_ISDATETIME(type_num)) { - is_datetimelike = 1; - PyArray_VectorUnaryFunc *castfunc = - PyArray_GetCastFunc(PyArray_DescrFromType(type_num), NPY_INT64); - if (!castfunc) { - PyErr_Format(PyExc_ValueError, - "Cannot cast numpy dtype %d to long", - enc->npyType); - } - castfunc(dataptr, &nanosecVal, 1, NULL, NULL); - } else if (PyDate_Check(item) || PyDelta_Check(item)) { - is_datetimelike = 1; - if (PyObject_HasAttrString(item, "_value")) { - // see test_date_index_and_values for case with non-nano - nanosecVal = get_long_attr(item, "_value"); - } else { - if (PyDelta_Check(item)) { - nanosecVal = total_seconds(item) * - 1000000000LL; // nanoseconds per second - } else { - // datetime.* objects don't follow above rules - nanosecVal = PyDateTimeToEpoch(item, NPY_FR_ns); - } - } - } - - if (is_datetimelike) { - if (nanosecVal == get_nat()) { - len = 4; - cLabel = PyObject_Malloc(len + 1); - strncpy(cLabel, "null", len + 1); - } else { - if (enc->datetimeIso) { - if ((type_num == NPY_TIMEDELTA) || (PyDelta_Check(item))) { - cLabel = int64ToIsoDuration(nanosecVal, &len); - } else { - if (type_num == NPY_DATETIME) { - cLabel = int64ToIso(nanosecVal, base, &len); - } else { - cLabel = PyDateTimeToIso(item, base, &len); - } - } - if (cLabel == NULL) { - Py_DECREF(item); - NpyArr_freeLabels(ret, num); - ret = 0; - break; - } - } else { - int size_of_cLabel = 21; // 21 chars for int 64 - cLabel = PyObject_Malloc(size_of_cLabel); - snprintf(cLabel, size_of_cLabel, "%" NPY_DATETIME_FMT, - NpyDateTimeToEpoch(nanosecVal, base)); - len = strlen(cLabel); - } - } - } else { // Fallback to string representation - // Replace item with the string to keep it alive. - Py_SETREF(item, PyObject_Str(item)); - if (item == NULL) { - NpyArr_freeLabels(ret, num); - ret = 0; - break; - } - - cLabel = (char *)PyUnicode_AsUTF8(item); - len = strlen(cLabel); - } - - // Add 1 to include NULL terminator - ret[i] = PyObject_Malloc(len + 1); - memcpy(ret[i], cLabel, len + 1); - Py_DECREF(item); - - if (is_datetimelike) { - PyObject_Free(cLabel); - } - - if (PyErr_Occurred()) { - NpyArr_freeLabels(ret, num); - ret = 0; - break; - } - - if (!ret[i]) { - PyErr_NoMemory(); - ret = 0; - break; - } - - dataptr += stride; - } - - Py_DECREF(labels); - return ret; -} - -void Object_invokeDefaultHandler(PyObject *obj, PyObjectEncoder *enc) { - PyObject *tmpObj = NULL; - tmpObj = PyObject_CallFunctionObjArgs(enc->defaultHandler, obj, NULL); - if (!PyErr_Occurred()) { - if (tmpObj == NULL) { - PyErr_SetString(PyExc_TypeError, - "Failed to execute default handler"); - } else { - encode(tmpObj, (JSONObjectEncoder *)enc, NULL, 0); - } - } - Py_XDECREF(tmpObj); - return; -} - -void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) { - PyObject *obj, *exc, *toDictFunc, *tmpObj, *values; - TypeContext *pc; - PyObjectEncoder *enc; - double val; - npy_int64 value; - int unit; - - tc->prv = NULL; - - if (!_obj) { - tc->type = JT_INVALID; - return; - } - - obj = (PyObject *)_obj; - enc = (PyObjectEncoder *)tc->encoder; - - if (PyBool_Check(obj)) { - tc->type = (obj == Py_True) ? JT_TRUE : JT_FALSE; - return; - } else if (obj == Py_None) { - tc->type = JT_NULL; - return; - } - - pc = createTypeContext(); - if (!pc) { - tc->type = JT_INVALID; - return; - } - tc->prv = pc; - - if (PyTypeNum_ISDATETIME(enc->npyType)) { - int64_t longVal; - PyArray_VectorUnaryFunc *castfunc = - PyArray_GetCastFunc(PyArray_DescrFromType(enc->npyType), NPY_INT64); - if (!castfunc) { - PyErr_Format(PyExc_ValueError, "Cannot cast numpy dtype %d to long", - enc->npyType); - } - castfunc(enc->npyValue, &longVal, 1, NULL, NULL); - if (longVal == get_nat()) { - tc->type = JT_NULL; - } else { - if (enc->datetimeIso) { - if (enc->npyType == NPY_TIMEDELTA) { - pc->PyTypeToUTF8 = NpyTimeDeltaToIsoCallback; - } else { - pc->PyTypeToUTF8 = NpyDateTimeToIsoCallback; - } - // Currently no way to pass longVal to iso function, so use - // state management - GET_TC(tc)->longValue = longVal; - tc->type = JT_UTF8; - } else { - NPY_DATETIMEUNIT base = - ((PyObjectEncoder *)tc->encoder)->datetimeUnit; - GET_TC(tc)->longValue = NpyDateTimeToEpoch(longVal, base); - tc->type = JT_LONG; - } - } - - // TODO(username): this prevents infinite loop with - // mixed-type DataFrames; - // refactor - enc->npyCtxtPassthru = NULL; - enc->npyType = -1; - return; - } - - if (PyIter_Check(obj) || - (PyArray_Check(obj) && !PyArray_CheckScalar(obj))) { - goto ISITERABLE; - } - - if (PyLong_Check(obj)) { - tc->type = JT_LONG; - int overflow = 0; - GET_TC(tc)->longValue = PyLong_AsLongLongAndOverflow(obj, &overflow); - int err; - err = (GET_TC(tc)->longValue == -1) && PyErr_Occurred(); - - if (overflow) { - tc->type = JT_BIGNUM; - } else if (err) { - goto INVALID; - } - - return; - } else if (PyFloat_Check(obj)) { - val = PyFloat_AS_DOUBLE(obj); - if (npy_isnan(val) || npy_isinf(val)) { - tc->type = JT_NULL; - } else { - GET_TC(tc)->doubleValue = val; - tc->type = JT_DOUBLE; - } - return; - } else if (PyBytes_Check(obj)) { - pc->PyTypeToUTF8 = PyBytesToUTF8; - tc->type = JT_UTF8; - return; - } else if (PyUnicode_Check(obj)) { - pc->PyTypeToUTF8 = PyUnicodeToUTF8; - tc->type = JT_UTF8; - return; - } else if (object_is_decimal_type(obj)) { - GET_TC(tc)->doubleValue = PyFloat_AsDouble(obj); - tc->type = JT_DOUBLE; - return; - } else if (PyDateTime_Check(obj) || PyDate_Check(obj)) { - if (object_is_nat_type(obj)) { - tc->type = JT_NULL; - return; - } - - if (enc->datetimeIso) { - pc->PyTypeToUTF8 = PyDateTimeToIsoCallback; - tc->type = JT_UTF8; - } else { - NPY_DATETIMEUNIT base = - ((PyObjectEncoder *)tc->encoder)->datetimeUnit; - GET_TC(tc)->longValue = PyDateTimeToEpoch(obj, base); - tc->type = JT_LONG; - } - return; - } else if (PyTime_Check(obj)) { - pc->PyTypeToUTF8 = PyTimeToJSON; - tc->type = JT_UTF8; - return; - } else if (PyArray_IsScalar(obj, Datetime)) { - if (((PyDatetimeScalarObject *)obj)->obval == get_nat()) { - tc->type = JT_NULL; - return; - } - - if (enc->datetimeIso) { - pc->PyTypeToUTF8 = PyDateTimeToIsoCallback; - tc->type = JT_UTF8; - } else { - NPY_DATETIMEUNIT base = - ((PyObjectEncoder *)tc->encoder)->datetimeUnit; - GET_TC(tc)->longValue = PyDateTimeToEpoch(obj, base); - tc->type = JT_LONG; - } - return; - } else if (PyDelta_Check(obj)) { - if (PyObject_HasAttrString(obj, "_value")) { - value = get_long_attr(obj, "_value"); - } else { - value = total_seconds(obj) * 1000000000LL; // nanoseconds per sec - } - - if (value == get_nat()) { - tc->type = JT_NULL; - return; - } else if (enc->datetimeIso) { - pc->PyTypeToUTF8 = NpyTimeDeltaToIsoCallback; - tc->type = JT_UTF8; - } else { - unit = ((PyObjectEncoder *)tc->encoder)->datetimeUnit; - if (scaleNanosecToUnit(&value, unit) != 0) { - // TODO(username): Add some kind of error handling here - } - - exc = PyErr_Occurred(); - - if (exc && PyErr_ExceptionMatches(PyExc_OverflowError)) { - goto INVALID; - } - - tc->type = JT_LONG; - } - GET_TC(tc)->longValue = value; - return; - } else if (PyArray_IsScalar(obj, Integer)) { - tc->type = JT_LONG; - PyArray_CastScalarToCtype(obj, &(GET_TC(tc)->longValue), - PyArray_DescrFromType(NPY_INT64)); - - exc = PyErr_Occurred(); - - if (exc && PyErr_ExceptionMatches(PyExc_OverflowError)) { - goto INVALID; - } - - return; - } else if (PyArray_IsScalar(obj, Bool)) { - PyArray_CastScalarToCtype(obj, &(GET_TC(tc)->longValue), - PyArray_DescrFromType(NPY_BOOL)); - tc->type = (GET_TC(tc)->longValue) ? JT_TRUE : JT_FALSE; - return; - } else if (PyArray_IsScalar(obj, Float) || PyArray_IsScalar(obj, Double)) { - PyArray_CastScalarToCtype(obj, &(GET_TC(tc)->doubleValue), - PyArray_DescrFromType(NPY_DOUBLE)); - tc->type = JT_DOUBLE; - return; - } else if (PyArray_Check(obj) && PyArray_CheckScalar(obj)) { - PyErr_Format(PyExc_TypeError, - "%R (0d array) is not JSON serializable at the moment", - obj); - goto INVALID; - } else if (object_is_na_type(obj)) { - tc->type = JT_NULL; - return; - } - -ISITERABLE: - - if (object_is_index_type(obj)) { - if (enc->outputFormat == SPLIT) { - tc->type = JT_OBJECT; - pc->iterBegin = Index_iterBegin; - pc->iterEnd = Index_iterEnd; - pc->iterNext = Index_iterNext; - pc->iterGetValue = Index_iterGetValue; - pc->iterGetName = Index_iterGetName; - return; - } - - pc->newObj = get_values(obj); - if (pc->newObj) { - tc->type = JT_ARRAY; - pc->iterBegin = NpyArr_iterBegin; - pc->iterEnd = NpyArr_iterEnd; - pc->iterNext = NpyArr_iterNext; - pc->iterGetValue = NpyArr_iterGetValue; - pc->iterGetName = NpyArr_iterGetName; - } else { - goto INVALID; - } - - return; - } else if (object_is_series_type(obj)) { - if (enc->outputFormat == SPLIT) { - tc->type = JT_OBJECT; - pc->iterBegin = Series_iterBegin; - pc->iterEnd = Series_iterEnd; - pc->iterNext = Series_iterNext; - pc->iterGetValue = Series_iterGetValue; - pc->iterGetName = Series_iterGetName; - return; - } - - pc->newObj = get_values(obj); - if (!pc->newObj) { - goto INVALID; - } - - if (enc->outputFormat == INDEX || enc->outputFormat == COLUMNS) { - tc->type = JT_OBJECT; - tmpObj = PyObject_GetAttrString(obj, "index"); - if (!tmpObj) { - goto INVALID; - } - values = get_values(tmpObj); - Py_DECREF(tmpObj); - if (!values) { - goto INVALID; - } - pc->columnLabelsLen = PyArray_DIM(pc->newObj, 0); - pc->columnLabels = NpyArr_encodeLabels((PyArrayObject *)values, enc, - pc->columnLabelsLen); - if (!pc->columnLabels) { - goto INVALID; - } - } else { - tc->type = JT_ARRAY; - } - pc->iterBegin = NpyArr_iterBegin; - pc->iterEnd = NpyArr_iterEnd; - pc->iterNext = NpyArr_iterNext; - pc->iterGetValue = NpyArr_iterGetValue; - pc->iterGetName = NpyArr_iterGetName; - return; - } else if (PyArray_Check(obj)) { - if (enc->npyCtxtPassthru) { - pc->npyarr = enc->npyCtxtPassthru; - tc->type = (pc->npyarr->columnLabels ? JT_OBJECT : JT_ARRAY); - - pc->iterBegin = NpyArrPassThru_iterBegin; - pc->iterNext = NpyArr_iterNext; - pc->iterEnd = NpyArrPassThru_iterEnd; - pc->iterGetValue = NpyArr_iterGetValue; - pc->iterGetName = NpyArr_iterGetName; - - enc->npyCtxtPassthru = NULL; - return; - } - - tc->type = JT_ARRAY; - pc->iterBegin = NpyArr_iterBegin; - pc->iterEnd = NpyArr_iterEnd; - pc->iterNext = NpyArr_iterNext; - pc->iterGetValue = NpyArr_iterGetValue; - pc->iterGetName = NpyArr_iterGetName; - return; - } else if (object_is_dataframe_type(obj)) { - if (enc->blkCtxtPassthru) { - pc->pdblock = enc->blkCtxtPassthru; - tc->type = - (pc->pdblock->npyCtxts[0]->columnLabels ? JT_OBJECT : JT_ARRAY); - - pc->iterBegin = PdBlockPassThru_iterBegin; - pc->iterEnd = PdBlockPassThru_iterEnd; - pc->iterNext = PdBlock_iterNextItem; - pc->iterGetName = PdBlock_iterGetName; - pc->iterGetValue = NpyArr_iterGetValue; - - enc->blkCtxtPassthru = NULL; - return; - } - - if (enc->outputFormat == SPLIT) { - tc->type = JT_OBJECT; - pc->iterBegin = DataFrame_iterBegin; - pc->iterEnd = DataFrame_iterEnd; - pc->iterNext = DataFrame_iterNext; - pc->iterGetValue = DataFrame_iterGetValue; - pc->iterGetName = DataFrame_iterGetName; - return; - } - - if (is_simple_frame(obj)) { - pc->iterBegin = NpyArr_iterBegin; - pc->iterEnd = NpyArr_iterEnd; - pc->iterNext = NpyArr_iterNext; - pc->iterGetName = NpyArr_iterGetName; - - pc->newObj = PyObject_GetAttrString(obj, "values"); - if (!pc->newObj) { - goto INVALID; - } - } else { - pc->iterBegin = PdBlock_iterBegin; - pc->iterEnd = PdBlock_iterEnd; - pc->iterNext = PdBlock_iterNext; - pc->iterGetName = PdBlock_iterGetName; - } - pc->iterGetValue = NpyArr_iterGetValue; - - if (enc->outputFormat == VALUES) { - tc->type = JT_ARRAY; - } else if (enc->outputFormat == RECORDS) { - tc->type = JT_ARRAY; - tmpObj = PyObject_GetAttrString(obj, "columns"); - if (!tmpObj) { - goto INVALID; - } - values = get_values(tmpObj); - if (!values) { - Py_DECREF(tmpObj); - goto INVALID; - } - pc->columnLabelsLen = PyObject_Size(tmpObj); - pc->columnLabels = NpyArr_encodeLabels((PyArrayObject *)values, enc, - pc->columnLabelsLen); - Py_DECREF(tmpObj); - if (!pc->columnLabels) { - goto INVALID; - } - } else if (enc->outputFormat == INDEX || enc->outputFormat == COLUMNS) { - tc->type = JT_OBJECT; - tmpObj = (enc->outputFormat == INDEX - ? PyObject_GetAttrString(obj, "index") - : PyObject_GetAttrString(obj, "columns")); - if (!tmpObj) { - goto INVALID; - } - values = get_values(tmpObj); - if (!values) { - Py_DECREF(tmpObj); - goto INVALID; - } - pc->rowLabelsLen = PyObject_Size(tmpObj); - pc->rowLabels = NpyArr_encodeLabels((PyArrayObject *)values, enc, - pc->rowLabelsLen); - Py_DECREF(tmpObj); - tmpObj = (enc->outputFormat == INDEX - ? PyObject_GetAttrString(obj, "columns") - : PyObject_GetAttrString(obj, "index")); - if (!tmpObj) { - NpyArr_freeLabels(pc->rowLabels, pc->rowLabelsLen); - pc->rowLabels = NULL; - goto INVALID; - } - values = get_values(tmpObj); - if (!values) { - Py_DECREF(tmpObj); - NpyArr_freeLabels(pc->rowLabels, pc->rowLabelsLen); - pc->rowLabels = NULL; - goto INVALID; - } - pc->columnLabelsLen = PyObject_Size(tmpObj); - pc->columnLabels = NpyArr_encodeLabels((PyArrayObject *)values, enc, - pc->columnLabelsLen); - Py_DECREF(tmpObj); - if (!pc->columnLabels) { - NpyArr_freeLabels(pc->rowLabels, pc->rowLabelsLen); - pc->rowLabels = NULL; - goto INVALID; - } - - if (enc->outputFormat == COLUMNS) { - pc->transpose = 1; - } - } else { - goto INVALID; - } - return; - } else if (PyDict_Check(obj)) { - tc->type = JT_OBJECT; - pc->iterBegin = Dict_iterBegin; - pc->iterEnd = Dict_iterEnd; - pc->iterNext = Dict_iterNext; - pc->iterGetValue = Dict_iterGetValue; - pc->iterGetName = Dict_iterGetName; - pc->dictObj = obj; - Py_INCREF(obj); - - return; - } else if (PyList_Check(obj)) { - tc->type = JT_ARRAY; - pc->iterBegin = List_iterBegin; - pc->iterEnd = List_iterEnd; - pc->iterNext = List_iterNext; - pc->iterGetValue = List_iterGetValue; - pc->iterGetName = List_iterGetName; - return; - } else if (PyTuple_Check(obj)) { - tc->type = JT_ARRAY; - pc->iterBegin = Tuple_iterBegin; - pc->iterEnd = Tuple_iterEnd; - pc->iterNext = Tuple_iterNext; - pc->iterGetValue = Tuple_iterGetValue; - pc->iterGetName = Tuple_iterGetName; - return; - } else if (PyAnySet_Check(obj)) { - tc->type = JT_ARRAY; - pc->iterBegin = Set_iterBegin; - pc->iterEnd = Set_iterEnd; - pc->iterNext = Set_iterNext; - pc->iterGetValue = Set_iterGetValue; - pc->iterGetName = Set_iterGetName; - return; - } - - toDictFunc = PyObject_GetAttrString(obj, "toDict"); - - if (toDictFunc) { - PyObject *tuple = PyTuple_New(0); - PyObject *toDictResult = PyObject_Call(toDictFunc, tuple, NULL); - Py_DECREF(tuple); - Py_DECREF(toDictFunc); - - if (toDictResult == NULL) { - PyErr_Clear(); - tc->type = JT_NULL; - return; - } - - if (!PyDict_Check(toDictResult)) { - Py_DECREF(toDictResult); - tc->type = JT_NULL; - return; - } - - tc->type = JT_OBJECT; - pc->iterBegin = Dict_iterBegin; - pc->iterEnd = Dict_iterEnd; - pc->iterNext = Dict_iterNext; - pc->iterGetValue = Dict_iterGetValue; - pc->iterGetName = Dict_iterGetName; - pc->dictObj = toDictResult; - return; - } - - PyErr_Clear(); - - if (enc->defaultHandler) { - Object_invokeDefaultHandler(obj, enc); - goto INVALID; - } - - tc->type = JT_OBJECT; - pc->iterBegin = Dir_iterBegin; - pc->iterEnd = Dir_iterEnd; - pc->iterNext = Dir_iterNext; - pc->iterGetValue = Dir_iterGetValue; - pc->iterGetName = Dir_iterGetName; - return; - -INVALID: - tc->type = JT_INVALID; - PyObject_Free(tc->prv); - tc->prv = NULL; - return; -} - -void Object_endTypeContext(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { - if (tc->prv) { - Py_XDECREF(GET_TC(tc)->newObj); - GET_TC(tc)->newObj = NULL; - NpyArr_freeLabels(GET_TC(tc)->rowLabels, GET_TC(tc)->rowLabelsLen); - GET_TC(tc)->rowLabels = NULL; - NpyArr_freeLabels(GET_TC(tc)->columnLabels, - GET_TC(tc)->columnLabelsLen); - GET_TC(tc)->columnLabels = NULL; - PyObject_Free(GET_TC(tc)->cStr); - GET_TC(tc)->cStr = NULL; - PyObject_Free(tc->prv); - tc->prv = NULL; - } -} - -const char *Object_getStringValue(JSOBJ obj, JSONTypeContext *tc, - size_t *_outLen) { - return GET_TC(tc)->PyTypeToUTF8(obj, tc, _outLen); -} - -JSINT64 Object_getLongValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { - return GET_TC(tc)->longValue; -} - -double Object_getDoubleValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { - return GET_TC(tc)->doubleValue; -} - -const char *Object_getBigNumStringValue(JSOBJ obj, JSONTypeContext *tc, - size_t *_outLen) { - PyObject *repr = PyObject_Str(obj); - const char *str = PyUnicode_AsUTF8AndSize(repr, (Py_ssize_t *)_outLen); - char *bytes = PyObject_Malloc(*_outLen + 1); - memcpy(bytes, str, *_outLen + 1); - GET_TC(tc)->cStr = bytes; - - Py_DECREF(repr); - - return GET_TC(tc)->cStr; -} - -static void Object_releaseObject(JSOBJ _obj) { Py_DECREF((PyObject *)_obj); } - -void Object_iterBegin(JSOBJ obj, JSONTypeContext *tc) { - GET_TC(tc)->iterBegin(obj, tc); -} - -int Object_iterNext(JSOBJ obj, JSONTypeContext *tc) { - return GET_TC(tc)->iterNext(obj, tc); -} - -void Object_iterEnd(JSOBJ obj, JSONTypeContext *tc) { - GET_TC(tc)->iterEnd(obj, tc); -} - -JSOBJ Object_iterGetValue(JSOBJ obj, JSONTypeContext *tc) { - return GET_TC(tc)->iterGetValue(obj, tc); -} - -char *Object_iterGetName(JSOBJ obj, JSONTypeContext *tc, size_t *outLen) { - return GET_TC(tc)->iterGetName(obj, tc, outLen); -} - -PyObject *objToJSON(PyObject *Py_UNUSED(self), PyObject *args, - PyObject *kwargs) { - PyDateTime_IMPORT; - if (PyDateTimeAPI == NULL) { - return NULL; - } - - PandasDateTime_IMPORT; - if (PandasDateTimeAPI == NULL) { - return NULL; - } - - static char *kwlist[] = {"obj", - "ensure_ascii", - "double_precision", - "encode_html_chars", - "orient", - "date_unit", - "iso_dates", - "default_handler", - "indent", - NULL}; - - char buffer[65536]; - char *ret; - PyObject *newobj; - PyObject *oinput = NULL; - PyObject *oensureAscii = NULL; - int idoublePrecision = 10; // default double precision setting - PyObject *oencodeHTMLChars = NULL; - char *sOrient = NULL; - char *sdateFormat = NULL; - PyObject *oisoDates = 0; - PyObject *odefHandler = 0; - int indent = 0; - - PyObjectEncoder pyEncoder = {{ - Object_beginTypeContext, - Object_endTypeContext, - Object_getStringValue, - Object_getLongValue, - NULL, // getIntValue is unused - Object_getDoubleValue, - Object_getBigNumStringValue, - Object_iterBegin, - Object_iterNext, - Object_iterEnd, - Object_iterGetValue, - Object_iterGetName, - Object_releaseObject, - PyObject_Malloc, - PyObject_Realloc, - PyObject_Free, - -1, // recursionMax - idoublePrecision, - 1, // forceAscii - 0, // encodeHTMLChars - 0, // indent - }}; - JSONObjectEncoder *encoder = (JSONObjectEncoder *)&pyEncoder; - - pyEncoder.npyCtxtPassthru = NULL; - pyEncoder.blkCtxtPassthru = NULL; - pyEncoder.npyType = -1; - pyEncoder.npyValue = NULL; - pyEncoder.datetimeIso = 0; - pyEncoder.datetimeUnit = NPY_FR_ms; - pyEncoder.outputFormat = COLUMNS; - pyEncoder.defaultHandler = 0; - - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|OiOssOOi", kwlist, - &oinput, &oensureAscii, &idoublePrecision, - &oencodeHTMLChars, &sOrient, &sdateFormat, - &oisoDates, &odefHandler, &indent)) { - return NULL; - } - - if (oensureAscii != NULL && !PyObject_IsTrue(oensureAscii)) { - encoder->forceASCII = 0; - } - - if (oencodeHTMLChars != NULL && PyObject_IsTrue(oencodeHTMLChars)) { - encoder->encodeHTMLChars = 1; - } - - if (idoublePrecision > JSON_DOUBLE_MAX_DECIMALS || idoublePrecision < 0) { - PyErr_Format( - PyExc_ValueError, - "Invalid value '%d' for option 'double_precision', max is '%u'", - idoublePrecision, JSON_DOUBLE_MAX_DECIMALS); - return NULL; - } - encoder->doublePrecision = idoublePrecision; - - if (sOrient != NULL) { - if (strcmp(sOrient, "records") == 0) { - pyEncoder.outputFormat = RECORDS; - } else if (strcmp(sOrient, "index") == 0) { - pyEncoder.outputFormat = INDEX; - } else if (strcmp(sOrient, "split") == 0) { - pyEncoder.outputFormat = SPLIT; - } else if (strcmp(sOrient, "values") == 0) { - pyEncoder.outputFormat = VALUES; - } else if (strcmp(sOrient, "columns") != 0) { - PyErr_Format(PyExc_ValueError, - "Invalid value '%s' for option 'orient'", sOrient); - return NULL; - } - } - - if (sdateFormat != NULL) { - if (strcmp(sdateFormat, "s") == 0) { - pyEncoder.datetimeUnit = NPY_FR_s; - } else if (strcmp(sdateFormat, "ms") == 0) { - pyEncoder.datetimeUnit = NPY_FR_ms; - } else if (strcmp(sdateFormat, "us") == 0) { - pyEncoder.datetimeUnit = NPY_FR_us; - } else if (strcmp(sdateFormat, "ns") == 0) { - pyEncoder.datetimeUnit = NPY_FR_ns; - } else { - PyErr_Format(PyExc_ValueError, - "Invalid value '%s' for option 'date_unit'", - sdateFormat); - return NULL; - } - } - - if (oisoDates != NULL && PyObject_IsTrue(oisoDates)) { - pyEncoder.datetimeIso = 1; - } - - if (odefHandler != NULL && odefHandler != Py_None) { - if (!PyCallable_Check(odefHandler)) { - PyErr_SetString(PyExc_TypeError, "Default handler is not callable"); - return NULL; - } - pyEncoder.defaultHandler = odefHandler; - } - - encoder->indent = indent; - - pyEncoder.originalOutputFormat = pyEncoder.outputFormat; - ret = JSON_EncodeObject(oinput, encoder, buffer, sizeof(buffer)); - if (PyErr_Occurred()) { - return NULL; - } - - if (encoder->errorMsg) { - if (ret != buffer) { - encoder->free(ret); - } - PyErr_Format(PyExc_OverflowError, "%s", encoder->errorMsg); - return NULL; - } - - newobj = PyUnicode_FromString(ret); - - if (ret != buffer) { - encoder->free(ret); - } - - return newobj; -} diff --git a/pandas/_libs/src/ujson/python/ujson.c b/pandas/_libs/src/ujson/python/ujson.c deleted file mode 100644 index 5c87ee6dd7ddc..0000000000000 --- a/pandas/_libs/src/ujson/python/ujson.c +++ /dev/null @@ -1,451 +0,0 @@ -/* -Copyright (c) 2011-2013, ESN Social Software AB and Jonas Tarnstrom -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: -* Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. -* Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -* Neither the name of the ESN Social Software AB nor the -names of its contributors may be used to endorse or promote products -derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL ESN SOCIAL SOFTWARE AB OR JONAS TARNSTROM BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - -Portions of code from MODP_ASCII - Ascii transformations (upper/lower, etc) -https://github.com/client9/stringencoders -Copyright (c) 2007 Nick Galbreath -- nickg [at] modp [dot] com. All rights reserved. - -Numeric decoder derived from TCL library -https://www.opensource.apple.com/source/tcl/tcl-14/tcl/license.terms -* Copyright (c) 1988-1993 The Regents of the University of California. -* Copyright (c) 1994 Sun Microsystems, Inc. -*/ - -#include "version.h" -#define PY_SSIZE_T_CLEAN -#include -#define PY_ARRAY_UNIQUE_SYMBOL UJSON_NUMPY -#include "numpy/arrayobject.h" - -/* objToJSON */ -PyObject *objToJSON(PyObject *self, PyObject *args, PyObject *kwargs); -void *initObjToJSON(void); - -/* JSONToObj */ -PyObject *JSONToObj(PyObject *self, PyObject *args, PyObject *kwargs); - -#define ENCODER_HELP_TEXT \ - "Use ensure_ascii=false to output UTF-8. Pass in double_precision to " \ - "alter the maximum digit precision of doubles. Set " \ - "encode_html_chars=True to encode < > & as unicode escape sequences." - -static PyMethodDef ujsonMethods[] = { - {"encode", (PyCFunction)objToJSON, METH_VARARGS | METH_KEYWORDS, - "Converts arbitrary object recursively into JSON. " ENCODER_HELP_TEXT}, - {"decode", (PyCFunction)JSONToObj, METH_VARARGS | METH_KEYWORDS, - "Converts JSON as string to dict object structure. Use precise_float=True " - "to use high precision float decoder."}, - {"dumps", (PyCFunction)objToJSON, METH_VARARGS | METH_KEYWORDS, - "Converts arbitrary object recursively into JSON. " ENCODER_HELP_TEXT}, - {"loads", (PyCFunction)JSONToObj, METH_VARARGS | METH_KEYWORDS, - "Converts JSON as string to dict object structure. Use precise_float=True " - "to use high precision float decoder."}, - {NULL, NULL, 0, NULL} /* Sentinel */ -}; - -typedef struct { - PyObject *type_decimal; - PyObject *type_dataframe; - PyObject *type_series; - PyObject *type_index; - PyObject *type_nat; - PyObject *type_na; -} modulestate; - -#define modulestate(o) ((modulestate *)PyModule_GetState(o)) - -static int module_traverse(PyObject *m, visitproc visit, void *arg); -static int module_clear(PyObject *m); -static void module_free(void *module); - -static struct PyModuleDef moduledef = {.m_base = PyModuleDef_HEAD_INIT, - .m_name = "pandas._libs.json", - .m_methods = ujsonMethods, - .m_size = sizeof(modulestate), - .m_traverse = module_traverse, - .m_clear = module_clear, - .m_free = module_free}; - -#ifndef PYPY_VERSION -/* Used in objToJSON.c */ -int object_is_decimal_type(PyObject *obj) { - PyObject *module = PyState_FindModule(&moduledef); - if (module == NULL) - return 0; - modulestate *state = modulestate(module); - if (state == NULL) - return 0; - PyObject *type_decimal = state->type_decimal; - if (type_decimal == NULL) { - PyErr_Clear(); - return 0; - } - int result = PyObject_IsInstance(obj, type_decimal); - if (result == -1) { - PyErr_Clear(); - return 0; - } - return result; -} - -int object_is_dataframe_type(PyObject *obj) { - PyObject *module = PyState_FindModule(&moduledef); - if (module == NULL) - return 0; - modulestate *state = modulestate(module); - if (state == NULL) - return 0; - PyObject *type_dataframe = state->type_dataframe; - if (type_dataframe == NULL) { - PyErr_Clear(); - return 0; - } - int result = PyObject_IsInstance(obj, type_dataframe); - if (result == -1) { - PyErr_Clear(); - return 0; - } - return result; -} - -int object_is_series_type(PyObject *obj) { - PyObject *module = PyState_FindModule(&moduledef); - if (module == NULL) - return 0; - modulestate *state = modulestate(module); - if (state == NULL) - return 0; - PyObject *type_series = state->type_series; - if (type_series == NULL) { - PyErr_Clear(); - return 0; - } - int result = PyObject_IsInstance(obj, type_series); - if (result == -1) { - PyErr_Clear(); - return 0; - } - return result; -} - -int object_is_index_type(PyObject *obj) { - PyObject *module = PyState_FindModule(&moduledef); - if (module == NULL) - return 0; - modulestate *state = modulestate(module); - if (state == NULL) - return 0; - PyObject *type_index = state->type_index; - if (type_index == NULL) { - PyErr_Clear(); - return 0; - } - int result = PyObject_IsInstance(obj, type_index); - if (result == -1) { - PyErr_Clear(); - return 0; - } - return result; -} - -int object_is_nat_type(PyObject *obj) { - PyObject *module = PyState_FindModule(&moduledef); - if (module == NULL) - return 0; - modulestate *state = modulestate(module); - if (state == NULL) - return 0; - PyObject *type_nat = state->type_nat; - if (type_nat == NULL) { - PyErr_Clear(); - return 0; - } - int result = PyObject_IsInstance(obj, type_nat); - if (result == -1) { - PyErr_Clear(); - return 0; - } - return result; -} - -int object_is_na_type(PyObject *obj) { - PyObject *module = PyState_FindModule(&moduledef); - if (module == NULL) - return 0; - modulestate *state = modulestate(module); - if (state == NULL) - return 0; - PyObject *type_na = state->type_na; - if (type_na == NULL) { - PyErr_Clear(); - return 0; - } - int result = PyObject_IsInstance(obj, type_na); - if (result == -1) { - PyErr_Clear(); - return 0; - } - return result; -} -#else - /* Used in objToJSON.c */ -int object_is_decimal_type(PyObject *obj) { - PyObject *module = PyImport_ImportModule("decimal"); - if (module == NULL) { - PyErr_Clear(); - return 0; - } - PyObject *type_decimal = PyObject_GetAttrString(module, "Decimal"); - if (type_decimal == NULL) { - Py_DECREF(module); - PyErr_Clear(); - return 0; - } - int result = PyObject_IsInstance(obj, type_decimal); - if (result == -1) { - Py_DECREF(module); - Py_DECREF(type_decimal); - PyErr_Clear(); - return 0; - } - return result; -} - -int object_is_dataframe_type(PyObject *obj) { - PyObject *module = PyImport_ImportModule("pandas"); - if (module == NULL) { - PyErr_Clear(); - return 0; - } - PyObject *type_dataframe = PyObject_GetAttrString(module, "DataFrame"); - if (type_dataframe == NULL) { - Py_DECREF(module); - PyErr_Clear(); - return 0; - } - int result = PyObject_IsInstance(obj, type_dataframe); - if (result == -1) { - Py_DECREF(module); - Py_DECREF(type_dataframe); - PyErr_Clear(); - return 0; - } - return result; -} - -int object_is_series_type(PyObject *obj) { - PyObject *module = PyImport_ImportModule("pandas"); - if (module == NULL) { - PyErr_Clear(); - return 0; - } - PyObject *type_series = PyObject_GetAttrString(module, "Series"); - if (type_series == NULL) { - Py_DECREF(module); - PyErr_Clear(); - return 0; - } - int result = PyObject_IsInstance(obj, type_series); - if (result == -1) { - Py_DECREF(module); - Py_DECREF(type_series); - PyErr_Clear(); - return 0; - } - return result; -} - -int object_is_index_type(PyObject *obj) { - PyObject *module = PyImport_ImportModule("pandas"); - if (module == NULL) { - PyErr_Clear(); - return 0; - } - PyObject *type_index = PyObject_GetAttrString(module, "Index"); - if (type_index == NULL) { - Py_DECREF(module); - PyErr_Clear(); - return 0; - } - int result = PyObject_IsInstance(obj, type_index); - if (result == -1) { - Py_DECREF(module); - Py_DECREF(type_index); - PyErr_Clear(); - return 0; - } - return result; -} - -int object_is_nat_type(PyObject *obj) { - PyObject *module = PyImport_ImportModule("pandas._libs.tslibs.nattype"); - if (module == NULL) { - PyErr_Clear(); - return 0; - } - PyObject *type_nat = PyObject_GetAttrString(module, "NaTType"); - if (type_nat == NULL) { - Py_DECREF(module); - PyErr_Clear(); - return 0; - } - int result = PyObject_IsInstance(obj, type_nat); - if (result == -1) { - Py_DECREF(module); - Py_DECREF(type_nat); - PyErr_Clear(); - return 0; - } - return result; -} - -int object_is_na_type(PyObject *obj) { - PyObject *module = PyImport_ImportModule("pandas._libs.missing"); - if (module == NULL) { - PyErr_Clear(); - return 0; - } - PyObject *type_na = PyObject_GetAttrString(module, "NAType"); - if (type_na == NULL) { - Py_DECREF(module); - PyErr_Clear(); - return 0; - } - int result = PyObject_IsInstance(obj, type_na); - if (result == -1) { - Py_DECREF(module); - Py_DECREF(type_na); - PyErr_Clear(); - return 0; - } - return result; -} - -#endif - -static int module_traverse(PyObject *m, visitproc visit, void *arg) { - Py_VISIT(modulestate(m)->type_decimal); - Py_VISIT(modulestate(m)->type_dataframe); - Py_VISIT(modulestate(m)->type_series); - Py_VISIT(modulestate(m)->type_index); - Py_VISIT(modulestate(m)->type_nat); - Py_VISIT(modulestate(m)->type_na); - return 0; -} - -static int module_clear(PyObject *m) { - Py_CLEAR(modulestate(m)->type_decimal); - Py_CLEAR(modulestate(m)->type_dataframe); - Py_CLEAR(modulestate(m)->type_series); - Py_CLEAR(modulestate(m)->type_index); - Py_CLEAR(modulestate(m)->type_nat); - Py_CLEAR(modulestate(m)->type_na); - return 0; -} - -static void module_free(void *module) { module_clear((PyObject *)module); } - -PyMODINIT_FUNC PyInit_json(void) { - import_array() - PyObject *module; - -#ifndef PYPY_VERSION - // This function is not supported in PyPy. - if ((module = PyState_FindModule(&moduledef)) != NULL) { - Py_INCREF(module); - return module; - } -#endif - - module = PyModule_Create(&moduledef); - if (module == NULL) { - return NULL; - } - -#ifndef PYPY_VERSION - PyObject *mod_decimal = PyImport_ImportModule("decimal"); - if (mod_decimal) { - PyObject *type_decimal = PyObject_GetAttrString(mod_decimal, "Decimal"); - assert(type_decimal != NULL); - modulestate(module)->type_decimal = type_decimal; - Py_DECREF(mod_decimal); - } - - PyObject *mod_pandas = PyImport_ImportModule("pandas"); - if (mod_pandas) { - PyObject *type_dataframe = - PyObject_GetAttrString(mod_pandas, "DataFrame"); - assert(type_dataframe != NULL); - modulestate(module)->type_dataframe = type_dataframe; - - PyObject *type_series = PyObject_GetAttrString(mod_pandas, "Series"); - assert(type_series != NULL); - modulestate(module)->type_series = type_series; - - PyObject *type_index = PyObject_GetAttrString(mod_pandas, "Index"); - assert(type_index != NULL); - modulestate(module)->type_index = type_index; - - Py_DECREF(mod_pandas); - } - - PyObject *mod_nattype = - PyImport_ImportModule("pandas._libs.tslibs.nattype"); - if (mod_nattype) { - PyObject *type_nat = PyObject_GetAttrString(mod_nattype, "NaTType"); - assert(type_nat != NULL); - modulestate(module)->type_nat = type_nat; - - Py_DECREF(mod_nattype); - } - - PyObject *mod_natype = PyImport_ImportModule("pandas._libs.missing"); - if (mod_natype) { - PyObject *type_na = PyObject_GetAttrString(mod_natype, "NAType"); - assert(type_na != NULL); - modulestate(module)->type_na = type_na; - - Py_DECREF(mod_natype); - } else { - PyErr_Clear(); - } -#endif - - /* Not vendored for now - JSONDecodeError = PyErr_NewException("ujson.JSONDecodeError", - PyExc_ValueError, NULL); Py_XINCREF(JSONDecodeError); if - (PyModule_AddObject(module, "JSONDecodeError", JSONDecodeError) < 0) - { - Py_XDECREF(JSONDecodeError); - Py_CLEAR(JSONDecodeError); - Py_DECREF(module); - return NULL; - } - */ - - return module; -} diff --git a/pandas/_libs/tslibs/np_datetime.pxd b/pandas/_libs/tslibs/np_datetime.pxd index 6d2d625638231..e3a4a9b88d0cb 100644 --- a/pandas/_libs/tslibs/np_datetime.pxd +++ b/pandas/_libs/tslibs/np_datetime.pxd @@ -55,7 +55,7 @@ cdef extern from "numpy/ndarraytypes.h": int64_t NPY_DATETIME_NAT # elswhere we call this NPY_NAT -cdef extern from "src/datetime/pd_datetime.h": +cdef extern from "pandas/datetime/pd_datetime.h": ctypedef struct pandas_timedeltastruct: int64_t days int32_t hrs, min, sec, ms, us, ns, seconds, microseconds, nanoseconds diff --git a/pandas/_libs/tslibs/np_datetime.pyx b/pandas/_libs/tslibs/np_datetime.pyx index e24a70c3d2b69..dfc78b9f12fe0 100644 --- a/pandas/_libs/tslibs/np_datetime.pyx +++ b/pandas/_libs/tslibs/np_datetime.pyx @@ -36,7 +36,7 @@ from numpy cimport ( from pandas._libs.tslibs.util cimport get_c_string_buf_and_size -cdef extern from "src/datetime/pd_datetime.h": +cdef extern from "pandas/datetime/pd_datetime.h": int cmp_npy_datetimestruct(npy_datetimestruct *a, npy_datetimestruct *b) diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 71550824525eb..536ae7ee4673b 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -83,10 +83,10 @@ from pandas._libs.tslibs.util cimport ( ) -cdef extern from "../src/headers/portable.h": +cdef extern from "pandas/portable.h": int getdigit_ascii(char c, int default) nogil -cdef extern from "../src/parser/tokenizer.h": +cdef extern from "pandas/parser/tokenizer.h": double xstrtod(const char *p, char **q, char decimal, char sci, char tsep, int skip_trailing, int *error, int *maybe_int) diff --git a/pandas/_libs/tslibs/src/datetime/date_conversions.c b/pandas/_libs/tslibs/src/datetime/date_conversions.c deleted file mode 100644 index 190713d62d306..0000000000000 --- a/pandas/_libs/tslibs/src/datetime/date_conversions.c +++ /dev/null @@ -1,100 +0,0 @@ -/* -Copyright (c) 2020, PyData Development Team -All rights reserved. -Distributed under the terms of the BSD Simplified License. -The full license is in the LICENSE file, distributed with this software. -*/ - -// Conversion routines that are useful for serialization, -// but which don't interact with JSON objects directly - -#include "date_conversions.h" -#include "np_datetime.h" -#include "np_datetime_strings.h" - -/* - * Function: scaleNanosecToUnit - * ----------------------------- - * - * Scales an integer value representing time in nanoseconds to provided unit. - * - * Mutates the provided value directly. Returns 0 on success, non-zero on error. - */ -int scaleNanosecToUnit(npy_int64 *value, NPY_DATETIMEUNIT unit) { - switch (unit) { - case NPY_FR_ns: - break; - case NPY_FR_us: - *value /= 1000LL; - break; - case NPY_FR_ms: - *value /= 1000000LL; - break; - case NPY_FR_s: - *value /= 1000000000LL; - break; - default: - return -1; - } - - return 0; -} - -/* Converts the int64_t representation of a datetime to ISO; mutates len */ -char *int64ToIso(int64_t value, NPY_DATETIMEUNIT base, size_t *len) { - npy_datetimestruct dts; - int ret_code; - - pandas_datetime_to_datetimestruct(value, NPY_FR_ns, &dts); - - *len = (size_t)get_datetime_iso_8601_strlen(0, base); - char *result = PyObject_Malloc(*len); - - if (result == NULL) { - PyErr_NoMemory(); - return NULL; - } - // datetime64 is always naive - ret_code = make_iso_8601_datetime(&dts, result, *len, 0, base); - if (ret_code != 0) { - PyErr_SetString(PyExc_ValueError, - "Could not convert datetime value to string"); - PyObject_Free(result); - } - - // Note that get_datetime_iso_8601_strlen just gives a generic size - // for ISO string conversion, not the actual size used - *len = strlen(result); - return result; -} - -npy_datetime NpyDateTimeToEpoch(npy_datetime dt, NPY_DATETIMEUNIT base) { - scaleNanosecToUnit(&dt, base); - return dt; -} - -/* Converts the int64_t representation of a duration to ISO; mutates len */ -char *int64ToIsoDuration(int64_t value, size_t *len) { - pandas_timedeltastruct tds; - int ret_code; - - pandas_timedelta_to_timedeltastruct(value, NPY_FR_ns, &tds); - - // Max theoretical length of ISO Duration with 64 bit day - // as the largest unit is 70 characters + 1 for a null terminator - char *result = PyObject_Malloc(71); - if (result == NULL) { - PyErr_NoMemory(); - return NULL; - } - - ret_code = make_iso_8601_timedelta(&tds, result, len); - if (ret_code == -1) { - PyErr_SetString(PyExc_ValueError, - "Could not convert timedelta value to string"); - PyObject_Free(result); - return NULL; - } - - return result; -} diff --git a/pandas/_libs/tslibs/src/datetime/np_datetime.c b/pandas/_libs/tslibs/src/datetime/np_datetime.c deleted file mode 100644 index e4d9c5dcd63ea..0000000000000 --- a/pandas/_libs/tslibs/src/datetime/np_datetime.c +++ /dev/null @@ -1,947 +0,0 @@ -/* - -Copyright (c) 2016, PyData Development Team -All rights reserved. - -Distributed under the terms of the BSD Simplified License. - -The full license is in the LICENSE file, distributed with this software. - -Copyright (c) 2005-2011, NumPy Developers -All rights reserved. - -This file is derived from NumPy 1.7. See NUMPY_LICENSE.txt - -*/ - -#define NO_IMPORT - -#ifndef NPY_NO_DEPRECATED_API -#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION -#endif // NPY_NO_DEPRECATED_API - -#include - -#include -#include -#include -#include "np_datetime.h" - - -const int days_per_month_table[2][12] = { - {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}, - {31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}}; - -/* - * Returns 1 if the given year is a leap year, 0 otherwise. - */ -int is_leapyear(npy_int64 year) { - return (year & 0x3) == 0 && /* year % 4 == 0 */ - ((year % 100) != 0 || (year % 400) == 0); -} - -/* - * Adjusts a datetimestruct based on a minutes offset. Assumes - * the current values are valid.g - */ -void add_minutes_to_datetimestruct(npy_datetimestruct *dts, int minutes) { - int isleap; - - /* MINUTES */ - dts->min += minutes; - while (dts->min < 0) { - dts->min += 60; - dts->hour--; - } - while (dts->min >= 60) { - dts->min -= 60; - dts->hour++; - } - - /* HOURS */ - while (dts->hour < 0) { - dts->hour += 24; - dts->day--; - } - while (dts->hour >= 24) { - dts->hour -= 24; - dts->day++; - } - - /* DAYS */ - if (dts->day < 1) { - dts->month--; - if (dts->month < 1) { - dts->year--; - dts->month = 12; - } - isleap = is_leapyear(dts->year); - dts->day += days_per_month_table[isleap][dts->month - 1]; - } else if (dts->day > 28) { - isleap = is_leapyear(dts->year); - if (dts->day > days_per_month_table[isleap][dts->month - 1]) { - dts->day -= days_per_month_table[isleap][dts->month - 1]; - dts->month++; - if (dts->month > 12) { - dts->year++; - dts->month = 1; - } - } - } -} - -/* - * Calculates the days offset from the 1970 epoch. - */ -npy_int64 get_datetimestruct_days(const npy_datetimestruct *dts) { - int i, month; - npy_int64 year, days = 0; - const int *month_lengths; - - year = dts->year - 1970; - days = year * 365; - - /* Adjust for leap years */ - if (days >= 0) { - /* - * 1968 is the closest leap year before 1970. - * Exclude the current year, so add 1. - */ - year += 1; - /* Add one day for each 4 years */ - days += year / 4; - /* 1900 is the closest previous year divisible by 100 */ - year += 68; - /* Subtract one day for each 100 years */ - days -= year / 100; - /* 1600 is the closest previous year divisible by 400 */ - year += 300; - /* Add one day for each 400 years */ - days += year / 400; - } else { - /* - * 1972 is the closest later year after 1970. - * Include the current year, so subtract 2. - */ - year -= 2; - /* Subtract one day for each 4 years */ - days += year / 4; - /* 2000 is the closest later year divisible by 100 */ - year -= 28; - /* Add one day for each 100 years */ - days -= year / 100; - /* 2000 is also the closest later year divisible by 400 */ - /* Subtract one day for each 400 years */ - days += year / 400; - } - - month_lengths = days_per_month_table[is_leapyear(dts->year)]; - month = dts->month - 1; - - /* Add the months */ - for (i = 0; i < month; ++i) { - days += month_lengths[i]; - } - - /* Add the days */ - days += dts->day - 1; - - return days; -} - -/* - * Modifies '*days_' to be the day offset within the year, - * and returns the year. - */ -static npy_int64 days_to_yearsdays(npy_int64 *days_) { - const npy_int64 days_per_400years = (400 * 365 + 100 - 4 + 1); - /* Adjust so it's relative to the year 2000 (divisible by 400) */ - npy_int64 days = (*days_) - (365 * 30 + 7); - npy_int64 year; - - /* Break down the 400 year cycle to get the year and day within the year */ - if (days >= 0) { - year = 400 * (days / days_per_400years); - days = days % days_per_400years; - } else { - year = 400 * ((days - (days_per_400years - 1)) / days_per_400years); - days = days % days_per_400years; - if (days < 0) { - days += days_per_400years; - } - } - - /* Work out the year/day within the 400 year cycle */ - if (days >= 366) { - year += 100 * ((days - 1) / (100 * 365 + 25 - 1)); - days = (days - 1) % (100 * 365 + 25 - 1); - if (days >= 365) { - year += 4 * ((days + 1) / (4 * 365 + 1)); - days = (days + 1) % (4 * 365 + 1); - if (days >= 366) { - year += (days - 1) / 365; - days = (days - 1) % 365; - } - } - } - - *days_ = days; - return year + 2000; -} - - -/* - * Fills in the year, month, day in 'dts' based on the days - * offset from 1970. - */ -static void set_datetimestruct_days(npy_int64 days, npy_datetimestruct *dts) { - const int *month_lengths; - int i; - - dts->year = days_to_yearsdays(&days); - month_lengths = days_per_month_table[is_leapyear(dts->year)]; - - for (i = 0; i < 12; ++i) { - if (days < month_lengths[i]) { - dts->month = i + 1; - dts->day = days + 1; - return; - } else { - days -= month_lengths[i]; - } - } -} - -/* - * Compares two npy_datetimestruct objects chronologically - */ -int cmp_npy_datetimestruct(const npy_datetimestruct *a, - const npy_datetimestruct *b) { - if (a->year > b->year) { - return 1; - } else if (a->year < b->year) { - return -1; - } - - if (a->month > b->month) { - return 1; - } else if (a->month < b->month) { - return -1; - } - - if (a->day > b->day) { - return 1; - } else if (a->day < b->day) { - return -1; - } - - if (a->hour > b->hour) { - return 1; - } else if (a->hour < b->hour) { - return -1; - } - - if (a->min > b->min) { - return 1; - } else if (a->min < b->min) { - return -1; - } - - if (a->sec > b->sec) { - return 1; - } else if (a->sec < b->sec) { - return -1; - } - - if (a->us > b->us) { - return 1; - } else if (a->us < b->us) { - return -1; - } - - if (a->ps > b->ps) { - return 1; - } else if (a->ps < b->ps) { - return -1; - } - - if (a->as > b->as) { - return 1; - } else if (a->as < b->as) { - return -1; - } - - return 0; -} -/* -* Returns the offset from utc of the timezone as a timedelta. -* The caller is responsible for ensuring that the tzinfo -* attribute exists on the datetime object. -* -* If the passed object is timezone naive, Py_None is returned. -* If extraction of the offset fails, NULL is returned. -* -* NOTE: This function is not vendored from numpy. -*/ -PyObject *extract_utc_offset(PyObject *obj) { - PyObject *tmp = PyObject_GetAttrString(obj, "tzinfo"); - if (tmp == NULL) { - return NULL; - } - if (tmp != Py_None) { - PyObject *offset = PyObject_CallMethod(tmp, "utcoffset", "O", obj); - if (offset == NULL) { - Py_DECREF(tmp); - return NULL; - } - return offset; - } - return tmp; -} - -/* - * Converts a datetime from a datetimestruct to a datetime based - * on a metadata unit. The date is assumed to be valid. - */ -npy_datetime npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT base, - const npy_datetimestruct *dts) { - npy_datetime ret; - - if (base == NPY_FR_Y) { - /* Truncate to the year */ - ret = dts->year - 1970; - } else if (base == NPY_FR_M) { - /* Truncate to the month */ - ret = 12 * (dts->year - 1970) + (dts->month - 1); - } else { - /* Otherwise calculate the number of days to start */ - npy_int64 days = get_datetimestruct_days(dts); - - switch (base) { - case NPY_FR_W: - /* Truncate to weeks */ - if (days >= 0) { - ret = days / 7; - } else { - ret = (days - 6) / 7; - } - break; - case NPY_FR_D: - ret = days; - break; - case NPY_FR_h: - ret = days * 24 + dts->hour; - break; - case NPY_FR_m: - ret = (days * 24 + dts->hour) * 60 + dts->min; - break; - case NPY_FR_s: - ret = ((days * 24 + dts->hour) * 60 + dts->min) * 60 + dts->sec; - break; - case NPY_FR_ms: - ret = (((days * 24 + dts->hour) * 60 + dts->min) * 60 + - dts->sec) * - 1000 + - dts->us / 1000; - break; - case NPY_FR_us: - ret = (((days * 24 + dts->hour) * 60 + dts->min) * 60 + - dts->sec) * - 1000000 + - dts->us; - break; - case NPY_FR_ns: - ret = ((((days * 24 + dts->hour) * 60 + dts->min) * 60 + - dts->sec) * - 1000000 + - dts->us) * - 1000 + - dts->ps / 1000; - break; - case NPY_FR_ps: - ret = ((((days * 24 + dts->hour) * 60 + dts->min) * 60 + - dts->sec) * - 1000000 + - dts->us) * - 1000000 + - dts->ps; - break; - case NPY_FR_fs: - /* only 2.6 hours */ - ret = (((((days * 24 + dts->hour) * 60 + dts->min) * 60 + - dts->sec) * - 1000000 + - dts->us) * - 1000000 + - dts->ps) * - 1000 + - dts->as / 1000; - break; - case NPY_FR_as: - /* only 9.2 secs */ - ret = (((((days * 24 + dts->hour) * 60 + dts->min) * 60 + - dts->sec) * - 1000000 + - dts->us) * - 1000000 + - dts->ps) * - 1000000 + - dts->as; - break; - default: - /* Something got corrupted */ - PyErr_SetString( - PyExc_ValueError, - "NumPy datetime metadata with corrupt unit value"); - return -1; - } - } - return ret; -} - -/* - * Port numpy#13188 https://github.com/numpy/numpy/pull/13188/ - * - * Computes the python `ret, d = divmod(d, unit)`. - * - * Note that GCC is smart enough at -O2 to eliminate the `if(*d < 0)` branch - * for subsequent calls to this command - it is able to deduce that `*d >= 0`. - */ -npy_int64 extract_unit(npy_datetime *d, npy_datetime unit) { - assert(unit > 0); - npy_int64 div = *d / unit; - npy_int64 mod = *d % unit; - if (mod < 0) { - mod += unit; - div -= 1; - } - assert(mod >= 0); - *d = mod; - return div; -} - -/* - * Converts a datetime based on the given metadata into a datetimestruct - */ -void pandas_datetime_to_datetimestruct(npy_datetime dt, - NPY_DATETIMEUNIT base, - npy_datetimestruct *out) { - npy_int64 perday; - - /* Initialize the output to all zeros */ - memset(out, 0, sizeof(npy_datetimestruct)); - out->year = 1970; - out->month = 1; - out->day = 1; - - /* - * Note that care must be taken with the / and % operators - * for negative values. - */ - switch (base) { - case NPY_FR_Y: - out->year = 1970 + dt; - break; - - case NPY_FR_M: - out->year = 1970 + extract_unit(&dt, 12); - out->month = dt + 1; - break; - - case NPY_FR_W: - /* A week is 7 days */ - set_datetimestruct_days(dt * 7, out); - break; - - case NPY_FR_D: - set_datetimestruct_days(dt, out); - break; - - case NPY_FR_h: - perday = 24LL; - - set_datetimestruct_days(extract_unit(&dt, perday), out); - out->hour = dt; - break; - - case NPY_FR_m: - perday = 24LL * 60; - - set_datetimestruct_days(extract_unit(&dt, perday), out); - out->hour = (int)extract_unit(&dt, 60); - out->min = (int)dt; - break; - - case NPY_FR_s: - perday = 24LL * 60 * 60; - - set_datetimestruct_days(extract_unit(&dt, perday), out); - out->hour = (int)extract_unit(&dt, 60 * 60); - out->min = (int)extract_unit(&dt, 60); - out->sec = (int)dt; - break; - - case NPY_FR_ms: - perday = 24LL * 60 * 60 * 1000; - - set_datetimestruct_days(extract_unit(&dt, perday), out); - out->hour = (int)extract_unit(&dt, 1000LL * 60 * 60); - out->min = (int)extract_unit(&dt, 1000LL * 60); - out->sec = (int)extract_unit(&dt, 1000LL); - out->us = (int)(dt * 1000); - break; - - case NPY_FR_us: - perday = 24LL * 60LL * 60LL * 1000LL * 1000LL; - - set_datetimestruct_days(extract_unit(&dt, perday), out); - out->hour = (int)extract_unit(&dt, 1000LL * 1000 * 60 * 60); - out->min = (int)extract_unit(&dt, 1000LL * 1000 * 60); - out->sec = (int)extract_unit(&dt, 1000LL * 1000); - out->us = (int)dt; - break; - - case NPY_FR_ns: - perday = 24LL * 60LL * 60LL * 1000LL * 1000LL * 1000LL; - - set_datetimestruct_days(extract_unit(&dt, perday), out); - out->hour = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 60 * 60); - out->min = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 60); - out->sec = (int)extract_unit(&dt, 1000LL * 1000 * 1000); - out->us = (int)extract_unit(&dt, 1000LL); - out->ps = (int)(dt * 1000); - break; - - case NPY_FR_ps: - perday = 24LL * 60 * 60 * 1000 * 1000 * 1000 * 1000; - - set_datetimestruct_days(extract_unit(&dt, perday), out); - out->hour = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 60 * 60); - out->min = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 60); - out->sec = (int)extract_unit(&dt, 1000LL * 1000 * 1000); - out->us = (int)extract_unit(&dt, 1000LL); - out->ps = (int)(dt * 1000); - break; - - case NPY_FR_fs: - /* entire range is only +- 2.6 hours */ - out->hour = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000 * - 1000 * 60 * 60); - if (out->hour < 0) { - out->year = 1969; - out->month = 12; - out->day = 31; - out->hour += 24; - assert(out->hour >= 0); - } - out->min = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000 * - 1000 * 60); - out->sec = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000 * - 1000); - out->us = (int)extract_unit(&dt, 1000LL * 1000 * 1000); - out->ps = (int)extract_unit(&dt, 1000LL); - out->as = (int)(dt * 1000); - break; - - case NPY_FR_as: - /* entire range is only +- 9.2 seconds */ - out->sec = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000 * - 1000 * 1000); - if (out->sec < 0) { - out->year = 1969; - out->month = 12; - out->day = 31; - out->hour = 23; - out->min = 59; - out->sec += 60; - assert(out->sec >= 0); - } - out->us = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000); - out->ps = (int)extract_unit(&dt, 1000LL * 1000); - out->as = (int)dt; - break; - - default: - PyErr_SetString(PyExc_RuntimeError, - "NumPy datetime metadata is corrupted with invalid " - "base unit"); - } -} - -/* - * Converts a timedelta from a timedeltastruct to a timedelta based - * on a metadata unit. The timedelta is assumed to be valid. - * - * Returns 0 on success, -1 on failure. - */ -void pandas_timedelta_to_timedeltastruct(npy_timedelta td, - NPY_DATETIMEUNIT base, - pandas_timedeltastruct *out) { - npy_int64 frac; - npy_int64 sfrac; - npy_int64 ifrac; - int sign; - npy_int64 per_day; - npy_int64 per_sec; - - /* Initialize the output to all zeros */ - memset(out, 0, sizeof(pandas_timedeltastruct)); - - switch (base) { - case NPY_FR_ns: - - per_day = 86400000000000LL; - per_sec = 1000LL * 1000LL * 1000LL; - - // put frac in seconds - if (td < 0 && td % per_sec != 0) - frac = td / per_sec - 1; - else - frac = td / per_sec; - - if (frac < 0) { - sign = -1; - - // even fraction - if ((-frac % 86400LL) != 0) { - out->days = -frac / 86400LL + 1; - frac += 86400LL * out->days; - } else { - frac = -frac; - } - } else { - sign = 1; - out->days = 0; - } - - if (frac >= 86400) { - out->days += frac / 86400LL; - frac -= out->days * 86400LL; - } - - if (frac >= 3600) { - out->hrs = frac / 3600LL; - frac -= out->hrs * 3600LL; - } else { - out->hrs = 0; - } - - if (frac >= 60) { - out->min = frac / 60LL; - frac -= out->min * 60LL; - } else { - out->min = 0; - } - - if (frac >= 0) { - out->sec = frac; - frac -= out->sec; - } else { - out->sec = 0; - } - - sfrac = (out->hrs * 3600LL + out->min * 60LL - + out->sec) * per_sec; - - if (sign < 0) - out->days = -out->days; - - ifrac = td - (out->days * per_day + sfrac); - - if (ifrac != 0) { - out->ms = ifrac / (1000LL * 1000LL); - ifrac -= out->ms * 1000LL * 1000LL; - out->us = ifrac / 1000LL; - ifrac -= out->us * 1000LL; - out->ns = ifrac; - } else { - out->ms = 0; - out->us = 0; - out->ns = 0; - } - break; - - case NPY_FR_us: - - per_day = 86400000000LL; - per_sec = 1000LL * 1000LL; - - // put frac in seconds - if (td < 0 && td % per_sec != 0) - frac = td / per_sec - 1; - else - frac = td / per_sec; - - if (frac < 0) { - sign = -1; - - // even fraction - if ((-frac % 86400LL) != 0) { - out->days = -frac / 86400LL + 1; - frac += 86400LL * out->days; - } else { - frac = -frac; - } - } else { - sign = 1; - out->days = 0; - } - - if (frac >= 86400) { - out->days += frac / 86400LL; - frac -= out->days * 86400LL; - } - - if (frac >= 3600) { - out->hrs = frac / 3600LL; - frac -= out->hrs * 3600LL; - } else { - out->hrs = 0; - } - - if (frac >= 60) { - out->min = frac / 60LL; - frac -= out->min * 60LL; - } else { - out->min = 0; - } - - if (frac >= 0) { - out->sec = frac; - frac -= out->sec; - } else { - out->sec = 0; - } - - sfrac = (out->hrs * 3600LL + out->min * 60LL - + out->sec) * per_sec; - - if (sign < 0) - out->days = -out->days; - - ifrac = td - (out->days * per_day + sfrac); - - if (ifrac != 0) { - out->ms = ifrac / 1000LL; - ifrac -= out->ms * 1000LL; - out->us = ifrac / 1L; - ifrac -= out->us * 1L; - out->ns = ifrac; - } else { - out->ms = 0; - out->us = 0; - out->ns = 0; - } - break; - - case NPY_FR_ms: - - per_day = 86400000LL; - per_sec = 1000LL; - - // put frac in seconds - if (td < 0 && td % per_sec != 0) - frac = td / per_sec - 1; - else - frac = td / per_sec; - - if (frac < 0) { - sign = -1; - - // even fraction - if ((-frac % 86400LL) != 0) { - out->days = -frac / 86400LL + 1; - frac += 86400LL * out->days; - } else { - frac = -frac; - } - } else { - sign = 1; - out->days = 0; - } - - if (frac >= 86400) { - out->days += frac / 86400LL; - frac -= out->days * 86400LL; - } - - if (frac >= 3600) { - out->hrs = frac / 3600LL; - frac -= out->hrs * 3600LL; - } else { - out->hrs = 0; - } - - if (frac >= 60) { - out->min = frac / 60LL; - frac -= out->min * 60LL; - } else { - out->min = 0; - } - - if (frac >= 0) { - out->sec = frac; - frac -= out->sec; - } else { - out->sec = 0; - } - - sfrac = (out->hrs * 3600LL + out->min * 60LL - + out->sec) * per_sec; - - if (sign < 0) - out->days = -out->days; - - ifrac = td - (out->days * per_day + sfrac); - - if (ifrac != 0) { - out->ms = ifrac; - out->us = 0; - out->ns = 0; - } else { - out->ms = 0; - out->us = 0; - out->ns = 0; - } - break; - - case NPY_FR_s: - // special case where we can simplify many expressions bc per_sec=1 - - per_day = 86400LL; - per_sec = 1L; - - // put frac in seconds - if (td < 0 && td % per_sec != 0) - frac = td / per_sec - 1; - else - frac = td / per_sec; - - if (frac < 0) { - sign = -1; - - // even fraction - if ((-frac % 86400LL) != 0) { - out->days = -frac / 86400LL + 1; - frac += 86400LL * out->days; - } else { - frac = -frac; - } - } else { - sign = 1; - out->days = 0; - } - - if (frac >= 86400) { - out->days += frac / 86400LL; - frac -= out->days * 86400LL; - } - - if (frac >= 3600) { - out->hrs = frac / 3600LL; - frac -= out->hrs * 3600LL; - } else { - out->hrs = 0; - } - - if (frac >= 60) { - out->min = frac / 60LL; - frac -= out->min * 60LL; - } else { - out->min = 0; - } - - if (frac >= 0) { - out->sec = frac; - frac -= out->sec; - } else { - out->sec = 0; - } - - sfrac = (out->hrs * 3600LL + out->min * 60LL - + out->sec) * per_sec; - - if (sign < 0) - out->days = -out->days; - - ifrac = td - (out->days * per_day + sfrac); - - if (ifrac != 0) { - out->ms = 0; - out->us = 0; - out->ns = 0; - } else { - out->ms = 0; - out->us = 0; - out->ns = 0; - } - break; - - case NPY_FR_m: - - out->days = td / 1440LL; - td -= out->days * 1440LL; - out->hrs = td / 60LL; - td -= out->hrs * 60LL; - out->min = td; - - out->sec = 0; - out->ms = 0; - out->us = 0; - out->ns = 0; - break; - - case NPY_FR_h: - out->days = td / 24LL; - td -= out->days * 24LL; - out->hrs = td; - - out->min = 0; - out->sec = 0; - out->ms = 0; - out->us = 0; - out->ns = 0; - break; - - case NPY_FR_D: - out->days = td; - out->hrs = 0; - out->min = 0; - out->sec = 0; - out->ms = 0; - out->us = 0; - out->ns = 0; - break; - - case NPY_FR_W: - out->days = 7 * td; - out->hrs = 0; - out->min = 0; - out->sec = 0; - out->ms = 0; - out->us = 0; - out->ns = 0; - break; - - default: - PyErr_SetString(PyExc_RuntimeError, - "NumPy timedelta metadata is corrupted with " - "invalid base unit"); - } - - out->seconds = out->hrs * 3600 + out->min * 60 + out->sec; - out->microseconds = out->ms * 1000 + out->us; - out->nanoseconds = out->ns; -} - - -/* - * This function returns a pointer to the DateTimeMetaData - * contained within the provided datetime dtype. - * - * Copied near-verbatim from numpy/core/src/multiarray/datetime.c - */ -PyArray_DatetimeMetaData -get_datetime_metadata_from_dtype(PyArray_Descr *dtype) { - return (((PyArray_DatetimeDTypeMetaData *)dtype->c_metadata)->meta); -} diff --git a/pandas/_libs/tslibs/src/datetime/np_datetime_strings.c b/pandas/_libs/tslibs/src/datetime/np_datetime_strings.c deleted file mode 100644 index f1f03e6467eac..0000000000000 --- a/pandas/_libs/tslibs/src/datetime/np_datetime_strings.c +++ /dev/null @@ -1,1150 +0,0 @@ -/* - -Copyright (c) 2016, PyData Development Team -All rights reserved. - -Distributed under the terms of the BSD Simplified License. - -The full license is in the LICENSE file, distributed with this software. - -Written by Mark Wiebe (mwwiebe@gmail.com) -Copyright (c) 2011 by Enthought, Inc. - -Copyright (c) 2005-2011, NumPy Developers -All rights reserved. - -See NUMPY_LICENSE.txt for the license. - -This file implements string parsing and creation for NumPy datetime. - -*/ - -#define PY_SSIZE_T_CLEAN -#define NO_IMPORT - -#ifndef NPY_NO_DEPRECATED_API -#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION -#endif // NPY_NO_DEPRECATED_API - -#include - -#include - -#include -#include -#include - -#include "np_datetime.h" -#include "np_datetime_strings.h" - - -/* - * Parses (almost) standard ISO 8601 date strings. The differences are: - * - * + Only seconds may have a decimal point, with up to 18 digits after it - * (maximum attoseconds precision). - * + Either a 'T' as in ISO 8601 or a ' ' may be used to separate - * the date and the time. Both are treated equivalently. - * + Doesn't (yet) handle the "YYYY-DDD" or "YYYY-Www" formats. - * + Doesn't handle leap seconds (seconds value has 60 in these cases). - * + Doesn't handle 24:00:00 as synonym for midnight (00:00:00) tomorrow - * + Accepts special values "NaT" (not a time), "Today", (current - * day according to local time) and "Now" (current time in UTC). - * + ':' separator between hours, minutes, and seconds is optional. When - * omitted, each component must be 2 digits if it appears. (GH-10041) - * - * 'str' must be a NULL-terminated string, and 'len' must be its length. - * - * 'out' gets filled with the parsed date-time. - * 'out_local' gets set to 1 if the parsed time contains timezone, - * to 0 otherwise. - * 'out_tzoffset' gets set to timezone offset by minutes - * if the parsed time was in local time, - * to 0 otherwise. The values 'now' and 'today' don't get counted - * as local, and neither do UTC +/-#### timezone offsets, because - * they aren't using the computer's local timezone offset. - * - * Returns 0 on success, -1 on failure. - */ - -typedef enum { - COMPARISON_SUCCESS, - COMPLETED_PARTIAL_MATCH, - COMPARISON_ERROR -} DatetimePartParseResult; -// This function will advance the pointer on format -// and decrement characters_remaining by n on success -// On failure will return COMPARISON_ERROR without incrementing -// If `format_requirement` is PARTIAL_MATCH, and the `format` string has -// been exhausted, then return COMPLETED_PARTIAL_MATCH. -static DatetimePartParseResult compare_format( - const char **format, - int *characters_remaining, - const char *compare_to, - int n, - const FormatRequirement format_requirement -) { - if (format_requirement == INFER_FORMAT) { - return COMPARISON_SUCCESS; - } - if (*characters_remaining < 0) { - return COMPARISON_ERROR; - } - if (format_requirement == PARTIAL_MATCH && *characters_remaining == 0) { - return COMPLETED_PARTIAL_MATCH; - } - if (*characters_remaining < n) { - // TODO(pandas-dev): PyErr to differentiate what went wrong - return COMPARISON_ERROR; - } else { - if (strncmp(*format, compare_to, n)) { - // TODO(pandas-dev): PyErr to differentiate what went wrong - return COMPARISON_ERROR; - } else { - *format += n; - *characters_remaining -= n; - return COMPARISON_SUCCESS; - } - } - return COMPARISON_SUCCESS; -} - -int parse_iso_8601_datetime(const char *str, int len, int want_exc, - npy_datetimestruct *out, - NPY_DATETIMEUNIT *out_bestunit, - int *out_local, int *out_tzoffset, - const char* format, int format_len, - FormatRequirement format_requirement) { - if (len < 0 || format_len < 0) - goto parse_error; - int year_leap = 0; - int i, numdigits; - const char *substr; - int sublen; - NPY_DATETIMEUNIT bestunit = NPY_FR_GENERIC; - DatetimePartParseResult comparison; - - /* If year-month-day are separated by a valid separator, - * months/days without leading zeroes will be parsed - * (though not iso8601). If the components aren't separated, - * 4 (YYYY) or 8 (YYYYMMDD) digits are expected. 6 digits are - * forbidden here (but parsed as YYMMDD elsewhere). - */ - int has_ymd_sep = 0; - char ymd_sep = '\0'; - char valid_ymd_sep[] = {'-', '.', '/', '\\', ' '}; - int valid_ymd_sep_len = sizeof(valid_ymd_sep); - - /* hour-minute-second may or may not separated by ':'. If not, then - * each component must be 2 digits. */ - int has_hms_sep = 0; - int hour_was_2_digits = 0; - - /* Initialize the output to all zeros */ - memset(out, 0, sizeof(npy_datetimestruct)); - out->month = 1; - out->day = 1; - - substr = str; - sublen = len; - - /* Skip leading whitespace */ - while (sublen > 0 && isspace(*substr)) { - ++substr; - --sublen; - comparison = compare_format(&format, &format_len, " ", 1, format_requirement); - if (comparison == COMPARISON_ERROR) { - goto parse_error; - } else if (comparison == COMPLETED_PARTIAL_MATCH) { - goto finish; - } - } - - /* Leading '-' sign for negative year */ - if (*substr == '-') { - ++substr; - --sublen; - } - - if (sublen == 0) { - goto parse_error; - } - - /* PARSE THE YEAR (4 digits) */ - comparison = compare_format(&format, &format_len, "%Y", 2, format_requirement); - if (comparison == COMPARISON_ERROR) { - goto parse_error; - } else if (comparison == COMPLETED_PARTIAL_MATCH) { - goto finish; - } - - out->year = 0; - if (sublen >= 4 && isdigit(substr[0]) && isdigit(substr[1]) && - isdigit(substr[2]) && isdigit(substr[3])) { - out->year = 1000 * (substr[0] - '0') + 100 * (substr[1] - '0') + - 10 * (substr[2] - '0') + (substr[3] - '0'); - - substr += 4; - sublen -= 4; - } - - /* Negate the year if necessary */ - if (str[0] == '-') { - out->year = -out->year; - } - /* Check whether it's a leap-year */ - year_leap = is_leapyear(out->year); - - /* Next character must be a separator, start of month, or end of string */ - if (sublen == 0) { - if (out_local != NULL) { - *out_local = 0; - } - if (format_len) { - goto parse_error; - } - bestunit = NPY_FR_Y; - goto finish; - } - - if (!isdigit(*substr)) { - for (i = 0; i < valid_ymd_sep_len; ++i) { - if (*substr == valid_ymd_sep[i]) { - break; - } - } - if (i == valid_ymd_sep_len) { - goto parse_error; - } - has_ymd_sep = 1; - ymd_sep = valid_ymd_sep[i]; - ++substr; - --sublen; - - comparison = compare_format(&format, &format_len, &ymd_sep, 1, - format_requirement); - if (comparison == COMPARISON_ERROR) { - goto parse_error; - } else if (comparison == COMPLETED_PARTIAL_MATCH) { - goto finish; - } - /* Cannot have trailing separator */ - if (sublen == 0 || !isdigit(*substr)) { - goto parse_error; - } - } - - /* PARSE THE MONTH */ - comparison = compare_format(&format, &format_len, "%m", 2, format_requirement); - if (comparison == COMPARISON_ERROR) { - goto parse_error; - } else if (comparison == COMPLETED_PARTIAL_MATCH) { - goto finish; - } - /* First digit required */ - out->month = (*substr - '0'); - ++substr; - --sublen; - /* Second digit optional if there was a separator */ - if (isdigit(*substr)) { - out->month = 10 * out->month + (*substr - '0'); - ++substr; - --sublen; - } else if (!has_ymd_sep) { - goto parse_error; - } - if (out->month < 1 || out->month > 12) { - if (want_exc) { - PyErr_Format(PyExc_ValueError, - "Month out of range in datetime string \"%s\"", str); - } - goto error; - } - - /* Next character must be the separator, start of day, or end of string */ - if (sublen == 0) { - bestunit = NPY_FR_M; - /* Forbid YYYYMM. Parsed instead as YYMMDD by someone else. */ - if (!has_ymd_sep) { - goto parse_error; - } - if (format_len) { - goto parse_error; - } - if (out_local != NULL) { - *out_local = 0; - } - goto finish; - } - - if (has_ymd_sep) { - /* Must have separator, but cannot be trailing */ - if (*substr != ymd_sep || sublen == 1) { - goto parse_error; - } - ++substr; - --sublen; - comparison = compare_format(&format, &format_len, &ymd_sep, 1, - format_requirement); - if (comparison == COMPARISON_ERROR) { - goto parse_error; - } else if (comparison == COMPLETED_PARTIAL_MATCH) { - goto finish; - } - } - - /* PARSE THE DAY */ - comparison = compare_format(&format, &format_len, "%d", 2, format_requirement); - if (comparison == COMPARISON_ERROR) { - goto parse_error; - } else if (comparison == COMPLETED_PARTIAL_MATCH) { - goto finish; - } - /* First digit required */ - if (!isdigit(*substr)) { - goto parse_error; - } - out->day = (*substr - '0'); - ++substr; - --sublen; - /* Second digit optional if there was a separator */ - if (isdigit(*substr)) { - out->day = 10 * out->day + (*substr - '0'); - ++substr; - --sublen; - } else if (!has_ymd_sep) { - goto parse_error; - } - if (out->day < 1 || - out->day > days_per_month_table[year_leap][out->month - 1]) { - if (want_exc) { - PyErr_Format(PyExc_ValueError, - "Day out of range in datetime string \"%s\"", str); - } - goto error; - } - - /* Next character must be a 'T', ' ', or end of string */ - if (sublen == 0) { - if (out_local != NULL) { - *out_local = 0; - } - if (format_len) { - goto parse_error; - } - bestunit = NPY_FR_D; - goto finish; - } - - if ((*substr != 'T' && *substr != ' ') || sublen == 1) { - goto parse_error; - } - comparison = compare_format(&format, &format_len, substr, 1, format_requirement); - if (comparison == COMPARISON_ERROR) { - goto parse_error; - } else if (comparison == COMPLETED_PARTIAL_MATCH) { - goto finish; - } - ++substr; - --sublen; - - /* PARSE THE HOURS */ - comparison = compare_format(&format, &format_len, "%H", 2, format_requirement); - if (comparison == COMPARISON_ERROR) { - goto parse_error; - } else if (comparison == COMPLETED_PARTIAL_MATCH) { - goto finish; - } - /* First digit required */ - if (!isdigit(*substr)) { - goto parse_error; - } - out->hour = (*substr - '0'); - ++substr; - --sublen; - /* Second digit optional */ - if (isdigit(*substr)) { - hour_was_2_digits = 1; - out->hour = 10 * out->hour + (*substr - '0'); - ++substr; - --sublen; - if (out->hour >= 24) { - if (want_exc) { - PyErr_Format(PyExc_ValueError, - "Hours out of range in datetime string \"%s\"", - str); - } - goto error; - } - } - - /* Next character must be a ':' or the end of the string */ - if (sublen == 0) { - if (!hour_was_2_digits) { - goto parse_error; - } - if (format_len) { - goto parse_error; - } - bestunit = NPY_FR_h; - goto finish; - } - - if (*substr == ':') { - has_hms_sep = 1; - ++substr; - --sublen; - /* Cannot have a trailing separator */ - if (sublen == 0 || !isdigit(*substr)) { - goto parse_error; - } - comparison = compare_format(&format, &format_len, ":", 1, format_requirement); - if (comparison == COMPARISON_ERROR) { - goto parse_error; - } else if (comparison == COMPLETED_PARTIAL_MATCH) { - goto finish; - } - } else if (!isdigit(*substr)) { - if (!hour_was_2_digits) { - goto parse_error; - } - goto parse_timezone; - } - - /* PARSE THE MINUTES */ - comparison = compare_format(&format, &format_len, "%M", 2, format_requirement); - if (comparison == COMPARISON_ERROR) { - goto parse_error; - } else if (comparison == COMPLETED_PARTIAL_MATCH) { - goto finish; - } - /* First digit required */ - out->min = (*substr - '0'); - ++substr; - --sublen; - /* Second digit optional if there was a separator */ - if (isdigit(*substr)) { - out->min = 10 * out->min + (*substr - '0'); - ++substr; - --sublen; - if (out->min >= 60) { - if (want_exc) { - PyErr_Format(PyExc_ValueError, - "Minutes out of range in datetime string \"%s\"", - str); - } - goto error; - } - } else if (!has_hms_sep) { - goto parse_error; - } - - if (sublen == 0) { - bestunit = NPY_FR_m; - if (format_len) { - goto parse_error; - } - goto finish; - } - - /* If we make it through this condition block, then the next - * character is a digit. */ - if (has_hms_sep && *substr == ':') { - comparison = compare_format(&format, &format_len, ":", 1, format_requirement); - if (comparison == COMPARISON_ERROR) { - goto parse_error; - } else if (comparison == COMPLETED_PARTIAL_MATCH) { - goto finish; - } - ++substr; - --sublen; - /* Cannot have a trailing ':' */ - if (sublen == 0 || !isdigit(*substr)) { - goto parse_error; - } - } else if (!has_hms_sep && isdigit(*substr)) { - } else { - goto parse_timezone; - } - - /* PARSE THE SECONDS */ - comparison = compare_format(&format, &format_len, "%S", 2, format_requirement); - if (comparison == COMPARISON_ERROR) { - goto parse_error; - } else if (comparison == COMPLETED_PARTIAL_MATCH) { - goto finish; - } - /* First digit required */ - out->sec = (*substr - '0'); - ++substr; - --sublen; - /* Second digit optional if there was a separator */ - if (isdigit(*substr)) { - out->sec = 10 * out->sec + (*substr - '0'); - ++substr; - --sublen; - if (out->sec >= 60) { - if (want_exc) { - PyErr_Format(PyExc_ValueError, - "Seconds out of range in datetime string \"%s\"", - str); - } - goto error; - } - } else if (!has_hms_sep) { - goto parse_error; - } - - /* Next character may be a '.' indicating fractional seconds */ - if (sublen > 0 && *substr == '.') { - ++substr; - --sublen; - comparison = compare_format(&format, &format_len, ".", 1, format_requirement); - if (comparison == COMPARISON_ERROR) { - goto parse_error; - } else if (comparison == COMPLETED_PARTIAL_MATCH) { - goto finish; - } - } else { - bestunit = NPY_FR_s; - goto parse_timezone; - } - - /* PARSE THE MICROSECONDS (0 to 6 digits) */ - comparison = compare_format(&format, &format_len, "%f", 2, format_requirement); - if (comparison == COMPARISON_ERROR) { - goto parse_error; - } else if (comparison == COMPLETED_PARTIAL_MATCH) { - goto finish; - } - numdigits = 0; - for (i = 0; i < 6; ++i) { - out->us *= 10; - if (sublen > 0 && isdigit(*substr)) { - out->us += (*substr - '0'); - ++substr; - --sublen; - ++numdigits; - } - } - - if (sublen == 0 || !isdigit(*substr)) { - if (numdigits > 3) { - bestunit = NPY_FR_us; - } else { - bestunit = NPY_FR_ms; - } - goto parse_timezone; - } - - /* PARSE THE PICOSECONDS (0 to 6 digits) */ - numdigits = 0; - for (i = 0; i < 6; ++i) { - out->ps *= 10; - if (sublen > 0 && isdigit(*substr)) { - out->ps += (*substr - '0'); - ++substr; - --sublen; - ++numdigits; - } - } - - if (sublen == 0 || !isdigit(*substr)) { - if (numdigits > 3) { - bestunit = NPY_FR_ps; - } else { - bestunit = NPY_FR_ns; - } - goto parse_timezone; - } - - /* PARSE THE ATTOSECONDS (0 to 6 digits) */ - numdigits = 0; - for (i = 0; i < 6; ++i) { - out->as *= 10; - if (sublen > 0 && isdigit(*substr)) { - out->as += (*substr - '0'); - ++substr; - --sublen; - ++numdigits; - } - } - - if (numdigits > 3) { - bestunit = NPY_FR_as; - } else { - bestunit = NPY_FR_fs; - } - -parse_timezone: - /* trim any whitespace between time/timezone */ - while (sublen > 0 && isspace(*substr)) { - ++substr; - --sublen; - comparison = compare_format(&format, &format_len, " ", 1, format_requirement); - if (comparison == COMPARISON_ERROR) { - goto parse_error; - } else if (comparison == COMPLETED_PARTIAL_MATCH) { - goto finish; - } - } - - if (sublen == 0) { - // Unlike NumPy, treating no time zone as naive - if (format_len > 0) { - goto parse_error; - } - goto finish; - } - - /* UTC specifier */ - if (*substr == 'Z') { - comparison = compare_format(&format, &format_len, "%z", 2, format_requirement); - if (comparison == COMPARISON_ERROR) { - goto parse_error; - } else if (comparison == COMPLETED_PARTIAL_MATCH) { - goto finish; - } - /* "Z" should be equivalent to tz offset "+00:00" */ - if (out_local != NULL) { - *out_local = 1; - } - - if (out_tzoffset != NULL) { - *out_tzoffset = 0; - } - - if (sublen == 1) { - if (format_len > 0) { - goto parse_error; - } - goto finish; - } else { - ++substr; - --sublen; - } - } else if (*substr == '-' || *substr == '+') { - comparison = compare_format(&format, &format_len, "%z", 2, format_requirement); - if (comparison == COMPARISON_ERROR) { - goto parse_error; - } else if (comparison == COMPLETED_PARTIAL_MATCH) { - goto finish; - } - /* Time zone offset */ - int offset_neg = 0, offset_hour = 0, offset_minute = 0; - - /* - * Since "local" means local with respect to the current - * machine, we say this is non-local. - */ - - if (*substr == '-') { - offset_neg = 1; - } - ++substr; - --sublen; - - /* The hours offset */ - if (sublen >= 2 && isdigit(substr[0]) && isdigit(substr[1])) { - offset_hour = 10 * (substr[0] - '0') + (substr[1] - '0'); - substr += 2; - sublen -= 2; - if (offset_hour >= 24) { - if (want_exc) { - PyErr_Format(PyExc_ValueError, - "Timezone hours offset out of range " - "in datetime string \"%s\"", - str); - } - goto error; - } - } else if (sublen >= 1 && isdigit(substr[0])) { - offset_hour = substr[0] - '0'; - ++substr; - --sublen; - } else { - goto parse_error; - } - - /* The minutes offset is optional */ - if (sublen > 0) { - /* Optional ':' */ - if (*substr == ':') { - ++substr; - --sublen; - } - - /* The minutes offset (at the end of the string) */ - if (sublen >= 2 && isdigit(substr[0]) && isdigit(substr[1])) { - offset_minute = 10 * (substr[0] - '0') + (substr[1] - '0'); - substr += 2; - sublen -= 2; - if (offset_minute >= 60) { - if (want_exc) { - PyErr_Format(PyExc_ValueError, - "Timezone minutes offset out of range " - "in datetime string \"%s\"", - str); - } - goto error; - } - } else if (sublen >= 1 && isdigit(substr[0])) { - offset_minute = substr[0] - '0'; - ++substr; - --sublen; - } else { - goto parse_error; - } - } - - /* Apply the time zone offset */ - if (offset_neg) { - offset_hour = -offset_hour; - offset_minute = -offset_minute; - } - if (out_local != NULL) { - *out_local = 1; - // Unlike NumPy, do not change internal value to local time - *out_tzoffset = 60 * offset_hour + offset_minute; - } - } - - /* Skip trailing whitespace */ - while (sublen > 0 && isspace(*substr)) { - ++substr; - --sublen; - comparison = compare_format(&format, &format_len, " ", 1, format_requirement); - if (comparison == COMPARISON_ERROR) { - goto parse_error; - } else if (comparison == COMPLETED_PARTIAL_MATCH) { - goto finish; - } - } - - if ((sublen != 0) || (format_len != 0)) { - goto parse_error; - } - -finish: - if (out_bestunit != NULL) { - *out_bestunit = bestunit; - } - return 0; - -parse_error: - if (want_exc) { - PyErr_Format(PyExc_ValueError, - "Error parsing datetime string \"%s\" at position %d", str, - (int)(substr - str)); - } - return -1; - -error: - return -1; -} - -/* - * Provides a string length to use for converting datetime - * objects with the given local and unit settings. - */ -int get_datetime_iso_8601_strlen(int local, NPY_DATETIMEUNIT base) { - int len = 0; - - switch (base) { - /* Generic units can only be used to represent NaT */ - /* return 4;*/ - case NPY_FR_as: - len += 3; /* "###" */ - case NPY_FR_fs: - len += 3; /* "###" */ - case NPY_FR_ps: - len += 3; /* "###" */ - case NPY_FR_ns: - len += 3; /* "###" */ - case NPY_FR_us: - len += 3; /* "###" */ - case NPY_FR_ms: - len += 4; /* ".###" */ - case NPY_FR_s: - len += 3; /* ":##" */ - case NPY_FR_m: - len += 3; /* ":##" */ - case NPY_FR_h: - len += 3; /* "T##" */ - case NPY_FR_D: - case NPY_FR_W: - len += 3; /* "-##" */ - case NPY_FR_M: - len += 3; /* "-##" */ - case NPY_FR_Y: - len += 21; /* 64-bit year */ - break; - default: - len += 3; /* handle the now defunct NPY_FR_B */ - break; - } - - if (base >= NPY_FR_h) { - if (local) { - len += 5; /* "+####" or "-####" */ - } else { - len += 1; /* "Z" */ - } - } - - len += 1; /* NULL terminator */ - - return len; -} - - -/* - * Converts an npy_datetimestruct to an (almost) ISO 8601 - * NULL-terminated string using timezone Z (UTC). If the string fits in - * the space exactly, it leaves out the NULL terminator and returns success. - * - * The differences from ISO 8601 are the 'NaT' string, and - * the number of year digits is >= 4 instead of strictly 4. - * - * 'base' restricts the output to that unit. Set 'base' to - * -1 to auto-detect a base after which all the values are zero. - * - * Returns 0 on success, -1 on failure (for example if the output - * string was too short). - */ -int make_iso_8601_datetime(npy_datetimestruct *dts, char *outstr, int outlen, - int utc, NPY_DATETIMEUNIT base) { - char *substr = outstr; - int sublen = outlen; - int tmplen; - - /* - * Print weeks with the same precision as days. - * - * TODO: Could print weeks with YYYY-Www format if the week - * epoch is a Monday. - */ - if (base == NPY_FR_W) { - base = NPY_FR_D; - } - -/* YEAR */ -/* - * Can't use PyOS_snprintf, because it always produces a '\0' - * character at the end, and NumPy string types are permitted - * to have data all the way to the end of the buffer. - */ -#ifdef _WIN32 - tmplen = _snprintf(substr, sublen, "%04" NPY_INT64_FMT, dts->year); -#else - tmplen = snprintf(substr, sublen, "%04" NPY_INT64_FMT, dts->year); -#endif // _WIN32 - /* If it ran out of space or there isn't space for the NULL terminator */ - if (tmplen < 0 || tmplen > sublen) { - goto string_too_short; - } - substr += tmplen; - sublen -= tmplen; - - /* Stop if the unit is years */ - if (base == NPY_FR_Y) { - if (sublen > 0) { - *substr = '\0'; - } - return 0; - } - - /* MONTH */ - if (sublen < 1) { - goto string_too_short; - } - substr[0] = '-'; - if (sublen < 2) { - goto string_too_short; - } - substr[1] = (char)((dts->month / 10) + '0'); - if (sublen < 3) { - goto string_too_short; - } - substr[2] = (char)((dts->month % 10) + '0'); - substr += 3; - sublen -= 3; - - /* Stop if the unit is months */ - if (base == NPY_FR_M) { - if (sublen > 0) { - *substr = '\0'; - } - return 0; - } - - /* DAY */ - if (sublen < 1) { - goto string_too_short; - } - substr[0] = '-'; - if (sublen < 2) { - goto string_too_short; - } - substr[1] = (char)((dts->day / 10) + '0'); - if (sublen < 3) { - goto string_too_short; - } - substr[2] = (char)((dts->day % 10) + '0'); - substr += 3; - sublen -= 3; - - /* Stop if the unit is days */ - if (base == NPY_FR_D) { - if (sublen > 0) { - *substr = '\0'; - } - return 0; - } - - /* HOUR */ - if (sublen < 1) { - goto string_too_short; - } - substr[0] = 'T'; - if (sublen < 2) { - goto string_too_short; - } - substr[1] = (char)((dts->hour / 10) + '0'); - if (sublen < 3) { - goto string_too_short; - } - substr[2] = (char)((dts->hour % 10) + '0'); - substr += 3; - sublen -= 3; - - /* Stop if the unit is hours */ - if (base == NPY_FR_h) { - goto add_time_zone; - } - - /* MINUTE */ - if (sublen < 1) { - goto string_too_short; - } - substr[0] = ':'; - if (sublen < 2) { - goto string_too_short; - } - substr[1] = (char)((dts->min / 10) + '0'); - if (sublen < 3) { - goto string_too_short; - } - substr[2] = (char)((dts->min % 10) + '0'); - substr += 3; - sublen -= 3; - - /* Stop if the unit is minutes */ - if (base == NPY_FR_m) { - goto add_time_zone; - } - - /* SECOND */ - if (sublen < 1) { - goto string_too_short; - } - substr[0] = ':'; - if (sublen < 2) { - goto string_too_short; - } - substr[1] = (char)((dts->sec / 10) + '0'); - if (sublen < 3) { - goto string_too_short; - } - substr[2] = (char)((dts->sec % 10) + '0'); - substr += 3; - sublen -= 3; - - /* Stop if the unit is seconds */ - if (base == NPY_FR_s) { - goto add_time_zone; - } - - /* MILLISECOND */ - if (sublen < 1) { - goto string_too_short; - } - substr[0] = '.'; - if (sublen < 2) { - goto string_too_short; - } - substr[1] = (char)((dts->us / 100000) % 10 + '0'); - if (sublen < 3) { - goto string_too_short; - } - substr[2] = (char)((dts->us / 10000) % 10 + '0'); - if (sublen < 4) { - goto string_too_short; - } - substr[3] = (char)((dts->us / 1000) % 10 + '0'); - substr += 4; - sublen -= 4; - - /* Stop if the unit is milliseconds */ - if (base == NPY_FR_ms) { - goto add_time_zone; - } - - /* MICROSECOND */ - if (sublen < 1) { - goto string_too_short; - } - substr[0] = (char)((dts->us / 100) % 10 + '0'); - if (sublen < 2) { - goto string_too_short; - } - substr[1] = (char)((dts->us / 10) % 10 + '0'); - if (sublen < 3) { - goto string_too_short; - } - substr[2] = (char)(dts->us % 10 + '0'); - substr += 3; - sublen -= 3; - - /* Stop if the unit is microseconds */ - if (base == NPY_FR_us) { - goto add_time_zone; - } - - /* NANOSECOND */ - if (sublen < 1) { - goto string_too_short; - } - substr[0] = (char)((dts->ps / 100000) % 10 + '0'); - if (sublen < 2) { - goto string_too_short; - } - substr[1] = (char)((dts->ps / 10000) % 10 + '0'); - if (sublen < 3) { - goto string_too_short; - } - substr[2] = (char)((dts->ps / 1000) % 10 + '0'); - substr += 3; - sublen -= 3; - - /* Stop if the unit is nanoseconds */ - if (base == NPY_FR_ns) { - goto add_time_zone; - } - - /* PICOSECOND */ - if (sublen < 1) { - goto string_too_short; - } - substr[0] = (char)((dts->ps / 100) % 10 + '0'); - if (sublen < 2) { - goto string_too_short; - } - substr[1] = (char)((dts->ps / 10) % 10 + '0'); - if (sublen < 3) { - goto string_too_short; - } - substr[2] = (char)(dts->ps % 10 + '0'); - substr += 3; - sublen -= 3; - - /* Stop if the unit is picoseconds */ - if (base == NPY_FR_ps) { - goto add_time_zone; - } - - /* FEMTOSECOND */ - if (sublen < 1) { - goto string_too_short; - } - substr[0] = (char)((dts->as / 100000) % 10 + '0'); - if (sublen < 2) { - goto string_too_short; - } - substr[1] = (char)((dts->as / 10000) % 10 + '0'); - if (sublen < 3) { - goto string_too_short; - } - substr[2] = (char)((dts->as / 1000) % 10 + '0'); - substr += 3; - sublen -= 3; - - /* Stop if the unit is femtoseconds */ - if (base == NPY_FR_fs) { - goto add_time_zone; - } - - /* ATTOSECOND */ - if (sublen < 1) { - goto string_too_short; - } - substr[0] = (char)((dts->as / 100) % 10 + '0'); - if (sublen < 2) { - goto string_too_short; - } - substr[1] = (char)((dts->as / 10) % 10 + '0'); - if (sublen < 3) { - goto string_too_short; - } - substr[2] = (char)(dts->as % 10 + '0'); - substr += 3; - sublen -= 3; - -add_time_zone: - /* UTC "Zulu" time */ - if (utc) { - if (sublen < 1) { - goto string_too_short; - } - substr[0] = 'Z'; - substr += 1; - sublen -= 1; - } - /* Add a NULL terminator, and return */ - if (sublen > 0) { - substr[0] = '\0'; - } - - return 0; - -string_too_short: - PyErr_Format(PyExc_RuntimeError, - "The string provided for NumPy ISO datetime formatting " - "was too short, with length %d", - outlen); - return -1; -} - - -int make_iso_8601_timedelta(pandas_timedeltastruct *tds, - char *outstr, size_t *outlen) { - *outlen = 0; - *outlen += snprintf(outstr, 60, // NOLINT - "P%" NPY_INT64_FMT - "DT%" NPY_INT32_FMT - "H%" NPY_INT32_FMT - "M%" NPY_INT32_FMT, - tds->days, tds->hrs, tds->min, tds->sec); - outstr += *outlen; - - if (tds->ns != 0) { - *outlen += snprintf(outstr, 12, // NOLINT - ".%03" NPY_INT32_FMT - "%03" NPY_INT32_FMT - "%03" NPY_INT32_FMT - "S", tds->ms, tds->us, tds->ns); - } else if (tds->us != 0) { - *outlen += snprintf(outstr, 9, // NOLINT - ".%03" NPY_INT32_FMT - "%03" NPY_INT32_FMT - "S", tds->ms, tds->us); - } else if (tds->ms != 0) { - *outlen += snprintf(outstr, 6, // NOLINT - ".%03" NPY_INT32_FMT "S", tds->ms); - } else { - *outlen += snprintf(outstr, 2, // NOLINT - "%s", "S"); - } - - return 0; -} diff --git a/pandas/_libs/tslibs/src/datetime/pd_datetime.c b/pandas/_libs/tslibs/src/datetime/pd_datetime.c deleted file mode 100644 index 98b6073d7a488..0000000000000 --- a/pandas/_libs/tslibs/src/datetime/pd_datetime.c +++ /dev/null @@ -1,253 +0,0 @@ -/* - -Copyright (c) 2016, PyData Development Team -All rights reserved. - -Distributed under the terms of the BSD Simplified License. - -The full license is in the LICENSE file, distributed with this software. - -Copyright (c) 2005-2011, NumPy Developers -All rights reserved. - -This file is derived from NumPy 1.7. See NUMPY_LICENSE.txt - -*/ - -#define _PANDAS_DATETIME_IMPL - -#define PY_SSIZE_T_CLEAN -#include - -#include "datetime.h" -#include "pd_datetime.h" - - -static void pandas_datetime_destructor(PyObject *op) { - void *ptr = PyCapsule_GetPointer(op, PandasDateTime_CAPSULE_NAME); - PyMem_Free(ptr); -} - -/* - * - * Converts a Python datetime.datetime or datetime.date - * object into a NumPy npy_datetimestruct. Uses tzinfo (if present) - * to convert to UTC time. - * - * The following implementation just asks for attributes, and thus - * supports datetime duck typing. The tzinfo time zone conversion - * requires this style of access as well. - * - * Returns -1 on error, 0 on success, and 1 (with no error set) - * if obj doesn't have the needed date or datetime attributes. - */ -static int convert_pydatetime_to_datetimestruct(PyObject *dtobj, - npy_datetimestruct *out) { - // Assumes that obj is a valid datetime object - PyObject *tmp; - PyObject *obj = (PyObject*)dtobj; - - /* Initialize the output to all zeros */ - memset(out, 0, sizeof(npy_datetimestruct)); - out->month = 1; - out->day = 1; - - out->year = PyLong_AsLong(PyObject_GetAttrString(obj, "year")); - out->month = PyLong_AsLong(PyObject_GetAttrString(obj, "month")); - out->day = PyLong_AsLong(PyObject_GetAttrString(obj, "day")); - - // TODO(anyone): If we can get PyDateTime_IMPORT to work, we could use - // PyDateTime_Check here, and less verbose attribute lookups. - - /* Check for time attributes (if not there, return success as a date) */ - if (!PyObject_HasAttrString(obj, "hour") || - !PyObject_HasAttrString(obj, "minute") || - !PyObject_HasAttrString(obj, "second") || - !PyObject_HasAttrString(obj, "microsecond")) { - return 0; - } - - out->hour = PyLong_AsLong(PyObject_GetAttrString(obj, "hour")); - out->min = PyLong_AsLong(PyObject_GetAttrString(obj, "minute")); - out->sec = PyLong_AsLong(PyObject_GetAttrString(obj, "second")); - out->us = PyLong_AsLong(PyObject_GetAttrString(obj, "microsecond")); - - if (PyObject_HasAttrString(obj, "tzinfo")) { - PyObject *offset = extract_utc_offset(obj); - /* Apply the time zone offset if datetime obj is tz-aware */ - if (offset != NULL) { - if (offset == Py_None) { - Py_DECREF(offset); - return 0; - } - PyObject *tmp_int; - int seconds_offset, minutes_offset; - /* - * The timedelta should have a function "total_seconds" - * which contains the value we want. - */ - tmp = PyObject_CallMethod(offset, "total_seconds", ""); - Py_DECREF(offset); - if (tmp == NULL) { - return -1; - } - tmp_int = PyNumber_Long(tmp); - if (tmp_int == NULL) { - Py_DECREF(tmp); - return -1; - } - seconds_offset = PyLong_AsLong(tmp_int); - if (seconds_offset == -1 && PyErr_Occurred()) { - Py_DECREF(tmp_int); - Py_DECREF(tmp); - return -1; - } - Py_DECREF(tmp_int); - Py_DECREF(tmp); - - /* Convert to a minutes offset and apply it */ - minutes_offset = seconds_offset / 60; - - add_minutes_to_datetimestruct(out, -minutes_offset); - } - } - - return 0; -} - -// Converts a Python object representing a Date / Datetime to ISO format -// up to precision `base` e.g. base="s" yields 2020-01-03T00:00:00Z -// while base="ns" yields "2020-01-01T00:00:00.000000000Z" -// len is mutated to save the length of the returned string -static char *PyDateTimeToIso(PyObject *obj, NPY_DATETIMEUNIT base, - size_t *len) { - npy_datetimestruct dts; - int ret; - - ret = convert_pydatetime_to_datetimestruct(obj, &dts); - if (ret != 0) { - if (!PyErr_Occurred()) { - PyErr_SetString(PyExc_ValueError, - "Could not convert PyDateTime to numpy datetime"); - } - return NULL; - } - - *len = (size_t)get_datetime_iso_8601_strlen(0, base); - char *result = PyObject_Malloc(*len); - // Check to see if PyDateTime has a timezone. - // Don't convert to UTC if it doesn't. - int is_tz_aware = 0; - if (PyObject_HasAttrString(obj, "tzinfo")) { - PyObject *offset = extract_utc_offset(obj); - if (offset == NULL) { - PyObject_Free(result); - return NULL; - } - is_tz_aware = offset != Py_None; - Py_DECREF(offset); - } - ret = make_iso_8601_datetime(&dts, result, *len, is_tz_aware, base); - - if (ret != 0) { - PyErr_SetString(PyExc_ValueError, - "Could not convert datetime value to string"); - PyObject_Free(result); - return NULL; - } - - // Note that get_datetime_iso_8601_strlen just gives a generic size - // for ISO string conversion, not the actual size used - *len = strlen(result); - return result; -} - -// Convert a Python Date/Datetime to Unix epoch with resolution base -static npy_datetime PyDateTimeToEpoch(PyObject *dt, NPY_DATETIMEUNIT base) { - npy_datetimestruct dts; - int ret; - - ret = convert_pydatetime_to_datetimestruct(dt, &dts); - if (ret != 0) { - if (!PyErr_Occurred()) { - PyErr_SetString(PyExc_ValueError, - "Could not convert PyDateTime to numpy datetime"); - } - // TODO(username): is setting errMsg required? - // ((JSONObjectEncoder *)tc->encoder)->errorMsg = ""; - // return NULL; - } - - npy_datetime npy_dt = npy_datetimestruct_to_datetime(NPY_FR_ns, &dts); - return NpyDateTimeToEpoch(npy_dt, base); -} - -static int pandas_datetime_exec(PyObject *module) { - PyDateTime_IMPORT; - PandasDateTime_CAPI *capi = PyMem_Malloc(sizeof(PandasDateTime_CAPI)); - if (capi == NULL) { - PyErr_NoMemory(); - return -1; - } - capi->npy_datetimestruct_to_datetime = npy_datetimestruct_to_datetime; - capi->scaleNanosecToUnit = scaleNanosecToUnit; - capi->int64ToIso = int64ToIso; - capi->NpyDateTimeToEpoch = NpyDateTimeToEpoch; - capi->PyDateTimeToIso = PyDateTimeToIso; - capi->PyDateTimeToEpoch = PyDateTimeToEpoch; - capi->int64ToIsoDuration = int64ToIsoDuration; - capi->pandas_datetime_to_datetimestruct = pandas_datetime_to_datetimestruct; - capi->pandas_timedelta_to_timedeltastruct = - pandas_timedelta_to_timedeltastruct; - capi->convert_pydatetime_to_datetimestruct = - convert_pydatetime_to_datetimestruct; - capi->cmp_npy_datetimestruct = cmp_npy_datetimestruct; - capi->get_datetime_metadata_from_dtype = get_datetime_metadata_from_dtype; - capi->parse_iso_8601_datetime = parse_iso_8601_datetime; - capi->get_datetime_iso_8601_strlen = get_datetime_iso_8601_strlen; - capi->make_iso_8601_datetime = make_iso_8601_datetime; - capi->make_iso_8601_timedelta = make_iso_8601_timedelta; - - PyObject *capsule = PyCapsule_New(capi, PandasDateTime_CAPSULE_NAME, - pandas_datetime_destructor); - if (capsule == NULL) { - PyMem_Free(capi); - return -1; - } - - // Monkeypatch the top level pandas module to have an attribute for the - // C-API. This is required because Python capsules do not support setting - // this attribute on anything but the top level package. Ideally not - // done when cpython gh-6898 gets implemented - PyObject *pandas = PyImport_ImportModule("pandas"); - if (!pandas) { - PyErr_SetString(PyExc_ImportError, - "pd_datetime.c could not import module pandas"); - Py_DECREF(capsule); - return -1; - } - - if (PyModule_AddObject(pandas, "_pandas_datetime_CAPI", capsule) < 0) { - Py_DECREF(capsule); - return -1; - } - - return 0; -} - -static PyModuleDef_Slot pandas_datetime_slots[] = { - {Py_mod_exec, pandas_datetime_exec}, {0, NULL}}; - -static struct PyModuleDef pandas_datetimemodule = { - PyModuleDef_HEAD_INIT, - .m_name = "pandas._libs.pandas_datetime", - - .m_doc = "Internal module with datetime support for other extensions", - .m_size = 0, - .m_methods = NULL, - .m_slots = pandas_datetime_slots}; - -PyMODINIT_FUNC PyInit_pandas_datetime(void) { - PyDateTime_IMPORT; - return PyModuleDef_Init(&pandas_datetimemodule); -} diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx index ef05b00a994a7..59d99494ed208 100644 --- a/pandas/_libs/window/aggregations.pyx +++ b/pandas/_libs/window/aggregations.pyx @@ -26,7 +26,7 @@ import cython from pandas._libs.algos import is_monotonic -cdef extern from "../src/skiplist.h": +cdef extern from "pandas/skiplist.h": ctypedef struct node_t: node_t **next int *width diff --git a/setup.py b/setup.py index ee444f1aaeb85..e285299cfc05e 100755 --- a/setup.py +++ b/setup.py @@ -115,15 +115,14 @@ def initialize_options(self): self._clean_trees = [] base = pjoin("pandas", "_libs", "src") - tsbase = pjoin("pandas", "_libs", "tslibs", "src") - dt = pjoin(tsbase, "datetime") - util = pjoin("pandas", "util") parser = pjoin(base, "parser") - ujson_python = pjoin(base, "ujson", "python") - ujson_lib = pjoin(base, "ujson", "lib") + vendored = pjoin(base, "vendored") + dt = pjoin(base, "datetime") + ujson_python = pjoin(vendored, "ujson", "python") + ujson_lib = pjoin(vendored, "ujson", "lib") self._clean_exclude = [ - pjoin(dt, "np_datetime.c"), - pjoin(dt, "np_datetime_strings.c"), + pjoin(vendored, "numpy", "datetime", "np_datetime.c"), + pjoin(vendored, "numpy", "datetime", "np_datetime_strings.c"), pjoin(dt, "date_conversions.c"), pjoin(parser, "tokenizer.c"), pjoin(parser, "io.c"), @@ -132,9 +131,8 @@ def initialize_options(self): pjoin(ujson_python, "JSONtoObj.c"), pjoin(ujson_lib, "ultrajsonenc.c"), pjoin(ujson_lib, "ultrajsondec.c"), - pjoin(util, "move.c"), - pjoin(tsbase, "datetime", "pd_datetime.c"), - pjoin("pandas", "_libs", "pd_parser.c"), + pjoin(dt, "pd_datetime.c"), + pjoin(parser, "pd_parser.c"), ] for root, dirs, files in os.walk("pandas"): @@ -431,19 +429,15 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): return pjoin("pandas", subdir, name + suffix) -lib_depends = ["pandas/_libs/src/parse_helper.h"] +lib_depends = ["pandas/_libs/include/pandas/parse_helper.h"] -klib_include = ["pandas/_libs/src/klib"] - -tseries_includes = ["pandas/_libs/tslibs/src/datetime"] tseries_depends = [ - "pandas/_libs/tslibs/src/datetime/pd_datetime.h", + "pandas/_libs/include/pandas/datetime/pd_datetime.h", ] ext_data = { "_libs.algos": { "pyxfile": "_libs/algos", - "include": klib_include, "depends": _pxi_dep["algos"], }, "_libs.arrays": {"pyxfile": "_libs/arrays"}, @@ -451,34 +445,32 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): "_libs.hashing": {"pyxfile": "_libs/hashing", "depends": []}, "_libs.hashtable": { "pyxfile": "_libs/hashtable", - "include": klib_include, "depends": ( - ["pandas/_libs/src/klib/khash_python.h", "pandas/_libs/src/klib/khash.h"] + [ + "pandas/_libs/include/pandas/vendored/klib/khash_python.h", + "pandas/_libs/include/pandas/vendored/klib/khash.h", + ] + _pxi_dep["hashtable"] ), }, "_libs.index": { "pyxfile": "_libs/index", - "include": klib_include, "depends": _pxi_dep["index"], }, "_libs.indexing": {"pyxfile": "_libs/indexing"}, "_libs.internals": {"pyxfile": "_libs/internals"}, "_libs.interval": { "pyxfile": "_libs/interval", - "include": klib_include, "depends": _pxi_dep["interval"], }, - "_libs.join": {"pyxfile": "_libs/join", "include": klib_include}, + "_libs.join": {"pyxfile": "_libs/join"}, "_libs.lib": { "pyxfile": "_libs/lib", "depends": lib_depends + tseries_depends, - "include": klib_include, # due to tokenizer import }, "_libs.missing": {"pyxfile": "_libs/missing", "depends": tseries_depends}, "_libs.parsers": { "pyxfile": "_libs/parsers", - "include": klib_include + ["pandas/_libs/src", "pandas/_libs"], "depends": [ "pandas/_libs/src/parser/tokenizer.h", "pandas/_libs/src/parser/io.h", @@ -500,7 +492,6 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): "_libs.tslibs.conversion": { "pyxfile": "_libs/tslibs/conversion", "depends": tseries_depends, - "include": klib_include, }, "_libs.tslibs.fields": { "pyxfile": "_libs/tslibs/fields", @@ -510,17 +501,13 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): "_libs.tslibs.np_datetime": { "pyxfile": "_libs/tslibs/np_datetime", "depends": tseries_depends, - "includes": tseries_includes, }, "_libs.tslibs.offsets": { "pyxfile": "_libs/tslibs/offsets", "depends": tseries_depends, - "includes": tseries_includes, }, "_libs.tslibs.parsing": { "pyxfile": "_libs/tslibs/parsing", - "include": tseries_includes + klib_include, - "depends": ["pandas/_libs/src/parser/tokenizer.h"], "sources": ["pandas/_libs/src/parser/tokenizer.c"], }, "_libs.tslibs.period": { @@ -537,7 +524,6 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): }, "_libs.tslibs.timestamps": { "pyxfile": "_libs/tslibs/timestamps", - "include": tseries_includes, "depends": tseries_depends, }, "_libs.tslibs.timezones": {"pyxfile": "_libs/tslibs/timezones"}, @@ -554,7 +540,7 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): "pyxfile": "_libs/window/aggregations", "language": "c++", "suffix": ".cpp", - "depends": ["pandas/_libs/src/skiplist.h"], + "depends": ["pandas/_libs/include/pandas/skiplist.h"], }, "_libs.window.indexers": {"pyxfile": "_libs/window/indexers"}, "_libs.writers": {"pyxfile": "_libs/writers"}, @@ -571,8 +557,7 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): sources.extend(data.get("sources", [])) - include = data.get("include", []) - include.append(numpy.get_include()) + include = ["pandas/_libs/include", numpy.get_include()] undef_macros = [] @@ -612,24 +597,22 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): ujson_ext = Extension( "pandas._libs.json", depends=[ - "pandas/_libs/src/ujson/lib/ultrajson.h", - "pandas/_libs/tslibs/src/datetime/pd_datetime.h", + "pandas/_libs/include/pandas/vendored/ujson/lib/ultrajson.h", + "pandas/_libs/include/pandas/datetime/pd_datetime.h", ], sources=( [ - "pandas/_libs/src/ujson/python/ujson.c", - "pandas/_libs/src/ujson/python/objToJSON.c", - "pandas/_libs/src/ujson/python/JSONtoObj.c", - "pandas/_libs/src/ujson/lib/ultrajsonenc.c", - "pandas/_libs/src/ujson/lib/ultrajsondec.c", + "pandas/_libs/src/vendored/ujson/python/ujson.c", + "pandas/_libs/src/vendored/ujson/python/objToJSON.c", + "pandas/_libs/src/vendored/ujson/python/JSONtoObj.c", + "pandas/_libs/src/vendored/ujson/lib/ultrajsonenc.c", + "pandas/_libs/src/vendored/ujson/lib/ultrajsondec.c", ] ), include_dirs=[ - "pandas/_libs/src/ujson/python", - "pandas/_libs/src/ujson/lib", + "pandas/_libs/include", numpy.get_include(), - ] - + tseries_includes, + ], extra_compile_args=(extra_compile_args), extra_link_args=extra_link_args, define_macros=macros, @@ -647,14 +630,14 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): depends=["pandas/_libs/tslibs/datetime/pd_datetime.h"], sources=( [ - "pandas/_libs/tslibs/src/datetime/np_datetime.c", - "pandas/_libs/tslibs/src/datetime/np_datetime_strings.c", - "pandas/_libs/tslibs/src/datetime/date_conversions.c", - "pandas/_libs/tslibs/src/datetime/pd_datetime.c", + "pandas/_libs/src/vendored/numpy/datetime/np_datetime.c", + "pandas/_libs/src/vendored/numpy/datetime/np_datetime_strings.c", + "pandas/_libs/src/datetime/date_conversions.c", + "pandas/_libs/src/datetime/pd_datetime.c", ] ), - include_dirs=tseries_includes - + [ + include_dirs=[ + "pandas/_libs/include", numpy.get_include(), ], extra_compile_args=(extra_compile_args), @@ -671,16 +654,16 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): # pd_datetime pd_parser_ext = Extension( "pandas._libs.pandas_parser", - depends=["pandas/_libs/pd_parser.h"], + depends=["pandas/_libs/include/pandas/parser/pd_parser.h"], sources=( [ "pandas/_libs/src/parser/tokenizer.c", "pandas/_libs/src/parser/io.c", - "pandas/_libs/pd_parser.c", + "pandas/_libs/src/parser/pd_parser.c", ] ), include_dirs=[ - "pandas/_libs/src/klib", + "pandas/_libs/include", ], extra_compile_args=(extra_compile_args), extra_link_args=extra_link_args, From 5eee4b2067dfeaa3cd16d9292366f0474ed2686f Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 22 May 2023 16:44:29 -0700 Subject: [PATCH 2/4] Meson updates --- pandas/_libs/meson.build | 48 +++++++++++++-------------------- pandas/_libs/tslibs/meson.build | 37 +++++++++---------------- pandas/_libs/window/meson.build | 4 +-- pandas/meson.build | 3 +-- 4 files changed, 34 insertions(+), 58 deletions(-) diff --git a/pandas/_libs/meson.build b/pandas/_libs/meson.build index 382247c63c1ad..5e59f15d0d089 100644 --- a/pandas/_libs/meson.build +++ b/pandas/_libs/meson.build @@ -61,44 +61,33 @@ subdir('tslibs') libs_sources = { # Dict of extension name -> dict of {sources, include_dirs, and deps} # numpy include dir is implicitly included - 'algos': {'sources': ['algos.pyx', _algos_common_helper, _algos_take_helper, _khash_primitive_helper], - 'include_dirs': klib_include}, + 'algos': {'sources': ['algos.pyx', _algos_common_helper, _algos_take_helper, _khash_primitive_helper]}, 'arrays': {'sources': ['arrays.pyx']}, 'groupby': {'sources': ['groupby.pyx']}, 'hashing': {'sources': ['hashing.pyx']}, - 'hashtable': {'sources': ['hashtable.pyx', _khash_primitive_helper, _hashtable_class_helper, _hashtable_func_helper], - 'include_dirs': klib_include}, - 'index': {'sources': ['index.pyx', _index_class_helper], - 'include_dirs': [klib_include, 'tslibs']}, + 'hashtable': {'sources': ['hashtable.pyx', _khash_primitive_helper, _hashtable_class_helper, _hashtable_func_helper]}, + 'index': {'sources': ['index.pyx', _index_class_helper]}, 'indexing': {'sources': ['indexing.pyx']}, 'internals': {'sources': ['internals.pyx']}, - 'interval': {'sources': ['interval.pyx', _intervaltree_helper], - 'include_dirs': [klib_include, 'tslibs']}, + 'interval': {'sources': ['interval.pyx', _intervaltree_helper]}, 'join': {'sources': ['join.pyx', _khash_primitive_helper], - 'include_dirs': klib_include, 'deps': _khash_primitive_helper_dep}, - 'lib': {'sources': ['lib.pyx', 'src/parser/tokenizer.c'], - 'include_dirs': [klib_include, inc_datetime]}, - 'missing': {'sources': ['missing.pyx'], - 'include_dirs': [inc_datetime]}, - 'pandas_datetime': {'sources': ['tslibs/src/datetime/np_datetime.c', - 'tslibs/src/datetime/np_datetime_strings.c', - 'tslibs/src/datetime/date_conversions.c', - 'tslibs/src/datetime/pd_datetime.c']}, - #'include_dirs': + 'lib': {'sources': ['lib.pyx', 'src/parser/tokenizer.c']}, + 'missing': {'sources': ['missing.pyx']}, + 'pandas_datetime': {'sources': ['src/vendored/numpy/datetime/np_datetime.c', + 'src/vendored/numpy/datetime/np_datetime_strings.c', + 'src/datetime/date_conversions.c', + 'src/datetime/pd_datetime.c']}, 'pandas_parser': {'sources': ['src/parser/tokenizer.c', 'src/parser/io.c', - 'pd_parser.c'], - 'include_dirs': [klib_include]}, + 'src/parser/pd_parser.c']}, 'parsers': {'sources': ['parsers.pyx', 'src/parser/tokenizer.c', 'src/parser/io.c'], - 'include_dirs': [klib_include, 'src'], 'deps': _khash_primitive_helper_dep}, - 'json': {'sources': ['src/ujson/python/ujson.c', - 'src/ujson/python/objToJSON.c', - 'src/ujson/python/JSONtoObj.c', - 'src/ujson/lib/ultrajsonenc.c', - 'src/ujson/lib/ultrajsondec.c'], - 'include_dirs': ['tslibs/src/datetime', 'src/ujson/lib', 'src/ujson/python']}, + 'json': {'sources': ['src/vendored/ujson/python/ujson.c', + 'src/vendored/ujson/python/objToJSON.c', + 'src/vendored/ujson/python/JSONtoObj.c', + 'src/vendored/ujson/lib/ultrajsonenc.c', + 'src/vendored/ujson/lib/ultrajsondec.c']}, 'ops': {'sources': ['ops.pyx']}, 'ops_dispatch': {'sources': ['ops_dispatch.pyx']}, 'properties': {'sources': ['properties.pyx']}, @@ -106,8 +95,7 @@ libs_sources = { 'sas': {'sources': ['sas.pyx']}, 'byteswap': {'sources': ['byteswap.pyx']}, 'sparse': {'sources': ['sparse.pyx', _sparse_op_helper]}, - 'tslib': {'sources': ['tslib.pyx'], - 'include_dirs': inc_datetime}, + 'tslib': {'sources': ['tslib.pyx']}, 'testing': {'sources': ['testing.pyx']}, 'writers': {'sources': ['writers.pyx']} } @@ -118,7 +106,7 @@ foreach ext_name, ext_dict : libs_sources ext_name, ext_dict.get('sources'), cython_args: ['--include-dir', meson.current_build_dir()], - include_directories: [inc_np] + ext_dict.get('include_dirs', ''), + include_directories: [inc_np, inc_pd], dependencies: ext_dict.get('deps', ''), subdir: 'pandas/_libs', install: true diff --git a/pandas/_libs/tslibs/meson.build b/pandas/_libs/tslibs/meson.build index fc8c9e609c416..4a51f8dc1e461 100644 --- a/pandas/_libs/tslibs/meson.build +++ b/pandas/_libs/tslibs/meson.build @@ -4,30 +4,19 @@ tslibs_sources = { 'base': {'sources': ['base.pyx']}, 'ccalendar': {'sources': ['ccalendar.pyx']}, 'dtypes': {'sources': ['dtypes.pyx']}, - 'conversion': {'sources': ['conversion.pyx', 'src/datetime/np_datetime.c'], - 'include_dirs': inc_datetime}, - 'fields': {'sources': ['fields.pyx', 'src/datetime/np_datetime.c']}, + 'conversion': {'sources': ['conversion.pyx']}, + 'fields': {'sources': ['fields.pyx']}, 'nattype': {'sources': ['nattype.pyx']}, - 'np_datetime': {'sources': ['np_datetime.pyx', 'src/datetime/np_datetime.c', 'src/datetime/np_datetime_strings.c'], - 'include_dirs': inc_datetime}, - 'offsets': {'sources': ['offsets.pyx', 'src/datetime/np_datetime.c'], - 'include_dirs': inc_datetime}, - 'parsing': {'sources': ['parsing.pyx', '../src/parser/tokenizer.c'], - 'include_dirs': klib_include}, - 'period': {'sources': ['period.pyx', 'src/datetime/np_datetime.c'], - 'include_dirs': inc_datetime}, - 'strptime': {'sources': ['strptime.pyx', 'src/datetime/np_datetime.c'], - 'include_dirs': inc_datetime}, - 'timedeltas': {'sources': ['timedeltas.pyx', 'src/datetime/np_datetime.c'], - 'include_dirs': inc_datetime}, - 'timestamps': {'sources': ['timestamps.pyx', 'src/datetime/np_datetime.c'], - 'include_dirs': inc_datetime}, - 'timezones': {'sources': ['timezones.pyx', 'src/datetime/np_datetime.c'], - 'include_dirs': inc_datetime}, - 'tzconversion': {'sources': ['tzconversion.pyx', 'src/datetime/np_datetime.c'], - 'include_dirs': inc_datetime}, - 'vectorized': {'sources': ['vectorized.pyx', 'src/datetime/np_datetime.c'], - 'include_dirs': inc_datetime} + 'np_datetime': {'sources': ['np_datetime.pyx']}, + 'offsets': {'sources': ['offsets.pyx']}, + 'parsing': {'sources': ['parsing.pyx', '../src/parser/tokenizer.c']}, + 'period': {'sources': ['period.pyx']}, + 'strptime': {'sources': ['strptime.pyx']}, + 'timedeltas': {'sources': ['timedeltas.pyx']}, + 'timestamps': {'sources': ['timestamps.pyx']}, + 'timezones': {'sources': ['timezones.pyx']}, + 'tzconversion': {'sources': ['tzconversion.pyx']}, + 'vectorized': {'sources': ['vectorized.pyx']}, } foreach ext_name, ext_dict : tslibs_sources @@ -35,7 +24,7 @@ foreach ext_name, ext_dict : tslibs_sources ext_name, ext_dict.get('sources'), cython_args: ['--include-dir', meson.current_build_dir()], - include_directories: [inc_np] + ext_dict.get('include_dirs', ''), + include_directories: [inc_np, inc_pd], dependencies: ext_dict.get('deps', ''), subdir: 'pandas/_libs/tslibs', install: true diff --git a/pandas/_libs/window/meson.build b/pandas/_libs/window/meson.build index 7d7c34a57c6a6..61719a35b2346 100644 --- a/pandas/_libs/window/meson.build +++ b/pandas/_libs/window/meson.build @@ -1,7 +1,7 @@ py.extension_module( 'aggregations', ['aggregations.pyx'], - include_directories: [inc_np, '../src'], + include_directories: [inc_np, inc_pd], dependencies: [py_dep], subdir: 'pandas/_libs/window', override_options : ['cython_language=cpp'], @@ -11,7 +11,7 @@ py.extension_module( py.extension_module( 'indexers', ['indexers.pyx'], - include_directories: [inc_np], + include_directories: [inc_np, inc_pd], dependencies: [py_dep], subdir: 'pandas/_libs/window', install: true diff --git a/pandas/meson.build b/pandas/meson.build index 491a08e6c0261..ab84fd688b762 100644 --- a/pandas/meson.build +++ b/pandas/meson.build @@ -7,8 +7,7 @@ incdir_numpy = run_command(py, ).stdout().strip() inc_np = include_directories(incdir_numpy) -klib_include = include_directories('_libs/src/klib') -inc_datetime = include_directories('_libs/tslibs') +inc_pd = include_directories('_libs/include') fs.copyfile('__init__.py') From db1b64fc0ad8148c72eead6799b6d1ec81b1660c Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 22 May 2023 16:47:02 -0700 Subject: [PATCH 3/4] fixed missing source files --- pandas/_libs/src/datetime/date_conversions.c | 100 + pandas/_libs/src/datetime/pd_datetime.c | 253 ++ pandas/_libs/src/parser/pd_parser.c | 178 ++ .../src/vendored/numpy/datetime/np_datetime.c | 947 ++++++++ .../numpy/datetime/np_datetime_strings.c | 1150 +++++++++ .../src/vendored/ujson/lib/ultrajsondec.c | 1208 ++++++++++ .../src/vendored/ujson/lib/ultrajsonenc.c | 1207 ++++++++++ .../src/vendored/ujson/python/JSONtoObj.c | 520 ++++ .../src/vendored/ujson/python/objToJSON.c | 2135 +++++++++++++++++ .../_libs/src/vendored/ujson/python/ujson.c | 451 ++++ 10 files changed, 8149 insertions(+) create mode 100644 pandas/_libs/src/datetime/date_conversions.c create mode 100644 pandas/_libs/src/datetime/pd_datetime.c create mode 100644 pandas/_libs/src/parser/pd_parser.c create mode 100644 pandas/_libs/src/vendored/numpy/datetime/np_datetime.c create mode 100644 pandas/_libs/src/vendored/numpy/datetime/np_datetime_strings.c create mode 100644 pandas/_libs/src/vendored/ujson/lib/ultrajsondec.c create mode 100644 pandas/_libs/src/vendored/ujson/lib/ultrajsonenc.c create mode 100644 pandas/_libs/src/vendored/ujson/python/JSONtoObj.c create mode 100644 pandas/_libs/src/vendored/ujson/python/objToJSON.c create mode 100644 pandas/_libs/src/vendored/ujson/python/ujson.c diff --git a/pandas/_libs/src/datetime/date_conversions.c b/pandas/_libs/src/datetime/date_conversions.c new file mode 100644 index 0000000000000..84fc5507010ed --- /dev/null +++ b/pandas/_libs/src/datetime/date_conversions.c @@ -0,0 +1,100 @@ +/* +Copyright (c) 2020, PyData Development Team +All rights reserved. +Distributed under the terms of the BSD Simplified License. +The full license is in the LICENSE file, distributed with this software. +*/ + +// Conversion routines that are useful for serialization, +// but which don't interact with JSON objects directly + +#include "pandas/datetime/date_conversions.h" +#include "pandas/vendored/numpy/datetime/np_datetime.h" +#include "pandas/vendored/numpy/datetime/np_datetime_strings.h" + +/* + * Function: scaleNanosecToUnit + * ----------------------------- + * + * Scales an integer value representing time in nanoseconds to provided unit. + * + * Mutates the provided value directly. Returns 0 on success, non-zero on error. + */ +int scaleNanosecToUnit(npy_int64 *value, NPY_DATETIMEUNIT unit) { + switch (unit) { + case NPY_FR_ns: + break; + case NPY_FR_us: + *value /= 1000LL; + break; + case NPY_FR_ms: + *value /= 1000000LL; + break; + case NPY_FR_s: + *value /= 1000000000LL; + break; + default: + return -1; + } + + return 0; +} + +/* Converts the int64_t representation of a datetime to ISO; mutates len */ +char *int64ToIso(int64_t value, NPY_DATETIMEUNIT base, size_t *len) { + npy_datetimestruct dts; + int ret_code; + + pandas_datetime_to_datetimestruct(value, NPY_FR_ns, &dts); + + *len = (size_t)get_datetime_iso_8601_strlen(0, base); + char *result = PyObject_Malloc(*len); + + if (result == NULL) { + PyErr_NoMemory(); + return NULL; + } + // datetime64 is always naive + ret_code = make_iso_8601_datetime(&dts, result, *len, 0, base); + if (ret_code != 0) { + PyErr_SetString(PyExc_ValueError, + "Could not convert datetime value to string"); + PyObject_Free(result); + } + + // Note that get_datetime_iso_8601_strlen just gives a generic size + // for ISO string conversion, not the actual size used + *len = strlen(result); + return result; +} + +npy_datetime NpyDateTimeToEpoch(npy_datetime dt, NPY_DATETIMEUNIT base) { + scaleNanosecToUnit(&dt, base); + return dt; +} + +/* Converts the int64_t representation of a duration to ISO; mutates len */ +char *int64ToIsoDuration(int64_t value, size_t *len) { + pandas_timedeltastruct tds; + int ret_code; + + pandas_timedelta_to_timedeltastruct(value, NPY_FR_ns, &tds); + + // Max theoretical length of ISO Duration with 64 bit day + // as the largest unit is 70 characters + 1 for a null terminator + char *result = PyObject_Malloc(71); + if (result == NULL) { + PyErr_NoMemory(); + return NULL; + } + + ret_code = make_iso_8601_timedelta(&tds, result, len); + if (ret_code == -1) { + PyErr_SetString(PyExc_ValueError, + "Could not convert timedelta value to string"); + PyObject_Free(result); + return NULL; + } + + return result; +} diff --git a/pandas/_libs/src/datetime/pd_datetime.c b/pandas/_libs/src/datetime/pd_datetime.c new file mode 100644 index 0000000000000..fc2cbcab90174 --- /dev/null +++ b/pandas/_libs/src/datetime/pd_datetime.c @@ -0,0 +1,253 @@ +/* + +Copyright (c) 2016, PyData Development Team +All rights reserved. + +Distributed under the terms of the BSD Simplified License. + +The full license is in the LICENSE file, distributed with this software. + +Copyright (c) 2005-2011, NumPy Developers +All rights reserved. + +This file is derived from NumPy 1.7. See NUMPY_LICENSE.txt + +*/ + +#define _PANDAS_DATETIME_IMPL + +#define PY_SSIZE_T_CLEAN +#include + +#include "datetime.h" +#include "pandas/datetime/pd_datetime.h" + + +static void pandas_datetime_destructor(PyObject *op) { + void *ptr = PyCapsule_GetPointer(op, PandasDateTime_CAPSULE_NAME); + PyMem_Free(ptr); +} + +/* + * + * Converts a Python datetime.datetime or datetime.date + * object into a NumPy npy_datetimestruct. Uses tzinfo (if present) + * to convert to UTC time. + * + * The following implementation just asks for attributes, and thus + * supports datetime duck typing. The tzinfo time zone conversion + * requires this style of access as well. + * + * Returns -1 on error, 0 on success, and 1 (with no error set) + * if obj doesn't have the needed date or datetime attributes. + */ +static int convert_pydatetime_to_datetimestruct(PyObject *dtobj, + npy_datetimestruct *out) { + // Assumes that obj is a valid datetime object + PyObject *tmp; + PyObject *obj = (PyObject*)dtobj; + + /* Initialize the output to all zeros */ + memset(out, 0, sizeof(npy_datetimestruct)); + out->month = 1; + out->day = 1; + + out->year = PyLong_AsLong(PyObject_GetAttrString(obj, "year")); + out->month = PyLong_AsLong(PyObject_GetAttrString(obj, "month")); + out->day = PyLong_AsLong(PyObject_GetAttrString(obj, "day")); + + // TODO(anyone): If we can get PyDateTime_IMPORT to work, we could use + // PyDateTime_Check here, and less verbose attribute lookups. + + /* Check for time attributes (if not there, return success as a date) */ + if (!PyObject_HasAttrString(obj, "hour") || + !PyObject_HasAttrString(obj, "minute") || + !PyObject_HasAttrString(obj, "second") || + !PyObject_HasAttrString(obj, "microsecond")) { + return 0; + } + + out->hour = PyLong_AsLong(PyObject_GetAttrString(obj, "hour")); + out->min = PyLong_AsLong(PyObject_GetAttrString(obj, "minute")); + out->sec = PyLong_AsLong(PyObject_GetAttrString(obj, "second")); + out->us = PyLong_AsLong(PyObject_GetAttrString(obj, "microsecond")); + + if (PyObject_HasAttrString(obj, "tzinfo")) { + PyObject *offset = extract_utc_offset(obj); + /* Apply the time zone offset if datetime obj is tz-aware */ + if (offset != NULL) { + if (offset == Py_None) { + Py_DECREF(offset); + return 0; + } + PyObject *tmp_int; + int seconds_offset, minutes_offset; + /* + * The timedelta should have a function "total_seconds" + * which contains the value we want. + */ + tmp = PyObject_CallMethod(offset, "total_seconds", ""); + Py_DECREF(offset); + if (tmp == NULL) { + return -1; + } + tmp_int = PyNumber_Long(tmp); + if (tmp_int == NULL) { + Py_DECREF(tmp); + return -1; + } + seconds_offset = PyLong_AsLong(tmp_int); + if (seconds_offset == -1 && PyErr_Occurred()) { + Py_DECREF(tmp_int); + Py_DECREF(tmp); + return -1; + } + Py_DECREF(tmp_int); + Py_DECREF(tmp); + + /* Convert to a minutes offset and apply it */ + minutes_offset = seconds_offset / 60; + + add_minutes_to_datetimestruct(out, -minutes_offset); + } + } + + return 0; +} + +// Converts a Python object representing a Date / Datetime to ISO format +// up to precision `base` e.g. base="s" yields 2020-01-03T00:00:00Z +// while base="ns" yields "2020-01-01T00:00:00.000000000Z" +// len is mutated to save the length of the returned string +static char *PyDateTimeToIso(PyObject *obj, NPY_DATETIMEUNIT base, + size_t *len) { + npy_datetimestruct dts; + int ret; + + ret = convert_pydatetime_to_datetimestruct(obj, &dts); + if (ret != 0) { + if (!PyErr_Occurred()) { + PyErr_SetString(PyExc_ValueError, + "Could not convert PyDateTime to numpy datetime"); + } + return NULL; + } + + *len = (size_t)get_datetime_iso_8601_strlen(0, base); + char *result = PyObject_Malloc(*len); + // Check to see if PyDateTime has a timezone. + // Don't convert to UTC if it doesn't. + int is_tz_aware = 0; + if (PyObject_HasAttrString(obj, "tzinfo")) { + PyObject *offset = extract_utc_offset(obj); + if (offset == NULL) { + PyObject_Free(result); + return NULL; + } + is_tz_aware = offset != Py_None; + Py_DECREF(offset); + } + ret = make_iso_8601_datetime(&dts, result, *len, is_tz_aware, base); + + if (ret != 0) { + PyErr_SetString(PyExc_ValueError, + "Could not convert datetime value to string"); + PyObject_Free(result); + return NULL; + } + + // Note that get_datetime_iso_8601_strlen just gives a generic size + // for ISO string conversion, not the actual size used + *len = strlen(result); + return result; +} + +// Convert a Python Date/Datetime to Unix epoch with resolution base +static npy_datetime PyDateTimeToEpoch(PyObject *dt, NPY_DATETIMEUNIT base) { + npy_datetimestruct dts; + int ret; + + ret = convert_pydatetime_to_datetimestruct(dt, &dts); + if (ret != 0) { + if (!PyErr_Occurred()) { + PyErr_SetString(PyExc_ValueError, + "Could not convert PyDateTime to numpy datetime"); + } + // TODO(username): is setting errMsg required? + // ((JSONObjectEncoder *)tc->encoder)->errorMsg = ""; + // return NULL; + } + + npy_datetime npy_dt = npy_datetimestruct_to_datetime(NPY_FR_ns, &dts); + return NpyDateTimeToEpoch(npy_dt, base); +} + +static int pandas_datetime_exec(PyObject *module) { + PyDateTime_IMPORT; + PandasDateTime_CAPI *capi = PyMem_Malloc(sizeof(PandasDateTime_CAPI)); + if (capi == NULL) { + PyErr_NoMemory(); + return -1; + } + capi->npy_datetimestruct_to_datetime = npy_datetimestruct_to_datetime; + capi->scaleNanosecToUnit = scaleNanosecToUnit; + capi->int64ToIso = int64ToIso; + capi->NpyDateTimeToEpoch = NpyDateTimeToEpoch; + capi->PyDateTimeToIso = PyDateTimeToIso; + capi->PyDateTimeToEpoch = PyDateTimeToEpoch; + capi->int64ToIsoDuration = int64ToIsoDuration; + capi->pandas_datetime_to_datetimestruct = pandas_datetime_to_datetimestruct; + capi->pandas_timedelta_to_timedeltastruct = + pandas_timedelta_to_timedeltastruct; + capi->convert_pydatetime_to_datetimestruct = + convert_pydatetime_to_datetimestruct; + capi->cmp_npy_datetimestruct = cmp_npy_datetimestruct; + capi->get_datetime_metadata_from_dtype = get_datetime_metadata_from_dtype; + capi->parse_iso_8601_datetime = parse_iso_8601_datetime; + capi->get_datetime_iso_8601_strlen = get_datetime_iso_8601_strlen; + capi->make_iso_8601_datetime = make_iso_8601_datetime; + capi->make_iso_8601_timedelta = make_iso_8601_timedelta; + + PyObject *capsule = PyCapsule_New(capi, PandasDateTime_CAPSULE_NAME, + pandas_datetime_destructor); + if (capsule == NULL) { + PyMem_Free(capi); + return -1; + } + + // Monkeypatch the top level pandas module to have an attribute for the + // C-API. This is required because Python capsules do not support setting + // this attribute on anything but the top level package. Ideally not + // done when cpython gh-6898 gets implemented + PyObject *pandas = PyImport_ImportModule("pandas"); + if (!pandas) { + PyErr_SetString(PyExc_ImportError, + "pd_datetime.c could not import module pandas"); + Py_DECREF(capsule); + return -1; + } + + if (PyModule_AddObject(pandas, "_pandas_datetime_CAPI", capsule) < 0) { + Py_DECREF(capsule); + return -1; + } + + return 0; +} + +static PyModuleDef_Slot pandas_datetime_slots[] = { + {Py_mod_exec, pandas_datetime_exec}, {0, NULL}}; + +static struct PyModuleDef pandas_datetimemodule = { + PyModuleDef_HEAD_INIT, + .m_name = "pandas._libs.pandas_datetime", + + .m_doc = "Internal module with datetime support for other extensions", + .m_size = 0, + .m_methods = NULL, + .m_slots = pandas_datetime_slots}; + +PyMODINIT_FUNC PyInit_pandas_datetime(void) { + PyDateTime_IMPORT; + return PyModuleDef_Init(&pandas_datetimemodule); +} diff --git a/pandas/_libs/src/parser/pd_parser.c b/pandas/_libs/src/parser/pd_parser.c new file mode 100644 index 0000000000000..c429f17c1cb8b --- /dev/null +++ b/pandas/_libs/src/parser/pd_parser.c @@ -0,0 +1,178 @@ +/* + +Copyright (c) 2023, PyData Development Team +All rights reserved. + +Distributed under the terms of the BSD Simplified License. + +*/ +#define _PANDAS_PARSER_IMPL + +#include "pandas/parser/pd_parser.h" +#include "pandas/parser/io.h" + +static int to_double(char *item, double *p_value, char sci, char decimal, + int *maybe_int) { + char *p_end = NULL; + int error = 0; + + /* Switch to precise xstrtod GH 31364 */ + *p_value = + precise_xstrtod(item, &p_end, decimal, sci, '\0', 1, &error, maybe_int); + + return (error == 0) && (!*p_end); +} + +static int floatify(PyObject *str, double *result, int *maybe_int) { + int status; + char *data; + PyObject *tmp = NULL; + const char sci = 'E'; + const char dec = '.'; + + if (PyBytes_Check(str)) { + data = PyBytes_AS_STRING(str); + } else if (PyUnicode_Check(str)) { + tmp = PyUnicode_AsUTF8String(str); + if (tmp == NULL) { + return -1; + } + data = PyBytes_AS_STRING(tmp); + } else { + PyErr_SetString(PyExc_TypeError, "Invalid object type"); + return -1; + } + + status = to_double(data, result, sci, dec, maybe_int); + + if (!status) { + /* handle inf/-inf infinity/-infinity */ + if (strlen(data) == 3) { + if (0 == strcasecmp(data, "inf")) { + *result = HUGE_VAL; + *maybe_int = 0; + } else { + goto parsingerror; + } + } else if (strlen(data) == 4) { + if (0 == strcasecmp(data, "-inf")) { + *result = -HUGE_VAL; + *maybe_int = 0; + } else if (0 == strcasecmp(data, "+inf")) { + *result = HUGE_VAL; + *maybe_int = 0; + } else { + goto parsingerror; + } + } else if (strlen(data) == 8) { + if (0 == strcasecmp(data, "infinity")) { + *result = HUGE_VAL; + *maybe_int = 0; + } else { + goto parsingerror; + } + } else if (strlen(data) == 9) { + if (0 == strcasecmp(data, "-infinity")) { + *result = -HUGE_VAL; + *maybe_int = 0; + } else if (0 == strcasecmp(data, "+infinity")) { + *result = HUGE_VAL; + *maybe_int = 0; + } else { + goto parsingerror; + } + } else { + goto parsingerror; + } + } + + Py_XDECREF(tmp); + return 0; + +parsingerror: + PyErr_Format(PyExc_ValueError, "Unable to parse string \"%s\"", data); + Py_XDECREF(tmp); + return -1; +} + + +static void pandas_parser_destructor(PyObject *op) { + void *ptr = PyCapsule_GetPointer(op, PandasParser_CAPSULE_NAME); + PyMem_Free(ptr); +} + +static int pandas_parser_exec(PyObject *module) { + PandasParser_CAPI *capi = PyMem_Malloc(sizeof(PandasParser_CAPI)); + if (capi == NULL) { + PyErr_NoMemory(); + return -1; + } + + capi->to_double = to_double; + capi->floatify = floatify; + capi->new_rd_source = new_rd_source; + capi->del_rd_source = del_rd_source; + capi->buffer_rd_bytes = buffer_rd_bytes; + capi->uint_state_init = uint_state_init; + capi->uint64_conflict = uint64_conflict; + capi->coliter_setup = coliter_setup; + capi->parser_new = parser_new; + capi->parser_init = parser_init; + capi->parser_free = parser_free; + capi->parser_del = parser_del; + capi->parser_add_skiprow = parser_add_skiprow; + capi->parser_set_skipfirstnrows = parser_set_skipfirstnrows; + capi->parser_set_default_options = parser_set_default_options; + capi->parser_consume_rows = parser_consume_rows; + capi->parser_trim_buffers = parser_trim_buffers; + capi->tokenize_all_rows = tokenize_all_rows; + capi->tokenize_nrows = tokenize_nrows; + capi->str_to_int64 = str_to_int64; + capi->str_to_uint64 = str_to_uint64; + capi->xstrtod = xstrtod; + capi->precise_xstrtod = precise_xstrtod; + capi->round_trip = round_trip; + capi->to_boolean = to_boolean; + + PyObject *capsule = + PyCapsule_New(capi, PandasParser_CAPSULE_NAME, pandas_parser_destructor); + if (capsule == NULL) { + PyMem_Free(capi); + return -1; + } + + // Monkeypatch the top level pandas module to have an attribute for the + // C-API. This is required because Python capsules do not support setting + // this attribute on anything but the top level package. Ideally not + // done when cpython gh-6898 gets implemented + PyObject *pandas = PyImport_ImportModule("pandas"); + if (!pandas) { + PyErr_SetString(PyExc_ImportError, + "pd_parser.c could not import module pandas"); + Py_DECREF(capsule); + return -1; + } + + if (PyModule_AddObject(pandas, "_pandas_parser_CAPI", capsule) < 0) { + Py_DECREF(capsule); + return -1; + } + + return 0; +} + +static PyModuleDef_Slot pandas_parser_slots[] = { + {Py_mod_exec, pandas_parser_exec}, {0, NULL}}; + +static struct PyModuleDef pandas_parsermodule = { + PyModuleDef_HEAD_INIT, + .m_name = "pandas._libs.pandas_parser", + + .m_doc = "Internal module with parser support for other extensions", + .m_size = 0, + .m_methods = NULL, + .m_slots = pandas_parser_slots}; + +PyMODINIT_FUNC PyInit_pandas_parser(void) { + return PyModuleDef_Init(&pandas_parsermodule); +} diff --git a/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c b/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c new file mode 100644 index 0000000000000..7e5cb53cf8f62 --- /dev/null +++ b/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c @@ -0,0 +1,947 @@ +/* + +Copyright (c) 2016, PyData Development Team +All rights reserved. + +Distributed under the terms of the BSD Simplified License. + +The full license is in the LICENSE file, distributed with this software. + +Copyright (c) 2005-2011, NumPy Developers +All rights reserved. + +This file is derived from NumPy 1.7. See NUMPY_LICENSE.txt + +*/ + +#define NO_IMPORT + +#ifndef NPY_NO_DEPRECATED_API +#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION +#endif // NPY_NO_DEPRECATED_API + +#include + +#include +#include +#include +#include "pandas/vendored/numpy/datetime/np_datetime.h" + + +const int days_per_month_table[2][12] = { + {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}, + {31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}}; + +/* + * Returns 1 if the given year is a leap year, 0 otherwise. + */ +int is_leapyear(npy_int64 year) { + return (year & 0x3) == 0 && /* year % 4 == 0 */ + ((year % 100) != 0 || (year % 400) == 0); +} + +/* + * Adjusts a datetimestruct based on a minutes offset. Assumes + * the current values are valid.g + */ +void add_minutes_to_datetimestruct(npy_datetimestruct *dts, int minutes) { + int isleap; + + /* MINUTES */ + dts->min += minutes; + while (dts->min < 0) { + dts->min += 60; + dts->hour--; + } + while (dts->min >= 60) { + dts->min -= 60; + dts->hour++; + } + + /* HOURS */ + while (dts->hour < 0) { + dts->hour += 24; + dts->day--; + } + while (dts->hour >= 24) { + dts->hour -= 24; + dts->day++; + } + + /* DAYS */ + if (dts->day < 1) { + dts->month--; + if (dts->month < 1) { + dts->year--; + dts->month = 12; + } + isleap = is_leapyear(dts->year); + dts->day += days_per_month_table[isleap][dts->month - 1]; + } else if (dts->day > 28) { + isleap = is_leapyear(dts->year); + if (dts->day > days_per_month_table[isleap][dts->month - 1]) { + dts->day -= days_per_month_table[isleap][dts->month - 1]; + dts->month++; + if (dts->month > 12) { + dts->year++; + dts->month = 1; + } + } + } +} + +/* + * Calculates the days offset from the 1970 epoch. + */ +npy_int64 get_datetimestruct_days(const npy_datetimestruct *dts) { + int i, month; + npy_int64 year, days = 0; + const int *month_lengths; + + year = dts->year - 1970; + days = year * 365; + + /* Adjust for leap years */ + if (days >= 0) { + /* + * 1968 is the closest leap year before 1970. + * Exclude the current year, so add 1. + */ + year += 1; + /* Add one day for each 4 years */ + days += year / 4; + /* 1900 is the closest previous year divisible by 100 */ + year += 68; + /* Subtract one day for each 100 years */ + days -= year / 100; + /* 1600 is the closest previous year divisible by 400 */ + year += 300; + /* Add one day for each 400 years */ + days += year / 400; + } else { + /* + * 1972 is the closest later year after 1970. + * Include the current year, so subtract 2. + */ + year -= 2; + /* Subtract one day for each 4 years */ + days += year / 4; + /* 2000 is the closest later year divisible by 100 */ + year -= 28; + /* Add one day for each 100 years */ + days -= year / 100; + /* 2000 is also the closest later year divisible by 400 */ + /* Subtract one day for each 400 years */ + days += year / 400; + } + + month_lengths = days_per_month_table[is_leapyear(dts->year)]; + month = dts->month - 1; + + /* Add the months */ + for (i = 0; i < month; ++i) { + days += month_lengths[i]; + } + + /* Add the days */ + days += dts->day - 1; + + return days; +} + +/* + * Modifies '*days_' to be the day offset within the year, + * and returns the year. + */ +static npy_int64 days_to_yearsdays(npy_int64 *days_) { + const npy_int64 days_per_400years = (400 * 365 + 100 - 4 + 1); + /* Adjust so it's relative to the year 2000 (divisible by 400) */ + npy_int64 days = (*days_) - (365 * 30 + 7); + npy_int64 year; + + /* Break down the 400 year cycle to get the year and day within the year */ + if (days >= 0) { + year = 400 * (days / days_per_400years); + days = days % days_per_400years; + } else { + year = 400 * ((days - (days_per_400years - 1)) / days_per_400years); + days = days % days_per_400years; + if (days < 0) { + days += days_per_400years; + } + } + + /* Work out the year/day within the 400 year cycle */ + if (days >= 366) { + year += 100 * ((days - 1) / (100 * 365 + 25 - 1)); + days = (days - 1) % (100 * 365 + 25 - 1); + if (days >= 365) { + year += 4 * ((days + 1) / (4 * 365 + 1)); + days = (days + 1) % (4 * 365 + 1); + if (days >= 366) { + year += (days - 1) / 365; + days = (days - 1) % 365; + } + } + } + + *days_ = days; + return year + 2000; +} + + +/* + * Fills in the year, month, day in 'dts' based on the days + * offset from 1970. + */ +static void set_datetimestruct_days(npy_int64 days, npy_datetimestruct *dts) { + const int *month_lengths; + int i; + + dts->year = days_to_yearsdays(&days); + month_lengths = days_per_month_table[is_leapyear(dts->year)]; + + for (i = 0; i < 12; ++i) { + if (days < month_lengths[i]) { + dts->month = i + 1; + dts->day = days + 1; + return; + } else { + days -= month_lengths[i]; + } + } +} + +/* + * Compares two npy_datetimestruct objects chronologically + */ +int cmp_npy_datetimestruct(const npy_datetimestruct *a, + const npy_datetimestruct *b) { + if (a->year > b->year) { + return 1; + } else if (a->year < b->year) { + return -1; + } + + if (a->month > b->month) { + return 1; + } else if (a->month < b->month) { + return -1; + } + + if (a->day > b->day) { + return 1; + } else if (a->day < b->day) { + return -1; + } + + if (a->hour > b->hour) { + return 1; + } else if (a->hour < b->hour) { + return -1; + } + + if (a->min > b->min) { + return 1; + } else if (a->min < b->min) { + return -1; + } + + if (a->sec > b->sec) { + return 1; + } else if (a->sec < b->sec) { + return -1; + } + + if (a->us > b->us) { + return 1; + } else if (a->us < b->us) { + return -1; + } + + if (a->ps > b->ps) { + return 1; + } else if (a->ps < b->ps) { + return -1; + } + + if (a->as > b->as) { + return 1; + } else if (a->as < b->as) { + return -1; + } + + return 0; +} +/* +* Returns the offset from utc of the timezone as a timedelta. +* The caller is responsible for ensuring that the tzinfo +* attribute exists on the datetime object. +* +* If the passed object is timezone naive, Py_None is returned. +* If extraction of the offset fails, NULL is returned. +* +* NOTE: This function is not vendored from numpy. +*/ +PyObject *extract_utc_offset(PyObject *obj) { + PyObject *tmp = PyObject_GetAttrString(obj, "tzinfo"); + if (tmp == NULL) { + return NULL; + } + if (tmp != Py_None) { + PyObject *offset = PyObject_CallMethod(tmp, "utcoffset", "O", obj); + if (offset == NULL) { + Py_DECREF(tmp); + return NULL; + } + return offset; + } + return tmp; +} + +/* + * Converts a datetime from a datetimestruct to a datetime based + * on a metadata unit. The date is assumed to be valid. + */ +npy_datetime npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT base, + const npy_datetimestruct *dts) { + npy_datetime ret; + + if (base == NPY_FR_Y) { + /* Truncate to the year */ + ret = dts->year - 1970; + } else if (base == NPY_FR_M) { + /* Truncate to the month */ + ret = 12 * (dts->year - 1970) + (dts->month - 1); + } else { + /* Otherwise calculate the number of days to start */ + npy_int64 days = get_datetimestruct_days(dts); + + switch (base) { + case NPY_FR_W: + /* Truncate to weeks */ + if (days >= 0) { + ret = days / 7; + } else { + ret = (days - 6) / 7; + } + break; + case NPY_FR_D: + ret = days; + break; + case NPY_FR_h: + ret = days * 24 + dts->hour; + break; + case NPY_FR_m: + ret = (days * 24 + dts->hour) * 60 + dts->min; + break; + case NPY_FR_s: + ret = ((days * 24 + dts->hour) * 60 + dts->min) * 60 + dts->sec; + break; + case NPY_FR_ms: + ret = (((days * 24 + dts->hour) * 60 + dts->min) * 60 + + dts->sec) * + 1000 + + dts->us / 1000; + break; + case NPY_FR_us: + ret = (((days * 24 + dts->hour) * 60 + dts->min) * 60 + + dts->sec) * + 1000000 + + dts->us; + break; + case NPY_FR_ns: + ret = ((((days * 24 + dts->hour) * 60 + dts->min) * 60 + + dts->sec) * + 1000000 + + dts->us) * + 1000 + + dts->ps / 1000; + break; + case NPY_FR_ps: + ret = ((((days * 24 + dts->hour) * 60 + dts->min) * 60 + + dts->sec) * + 1000000 + + dts->us) * + 1000000 + + dts->ps; + break; + case NPY_FR_fs: + /* only 2.6 hours */ + ret = (((((days * 24 + dts->hour) * 60 + dts->min) * 60 + + dts->sec) * + 1000000 + + dts->us) * + 1000000 + + dts->ps) * + 1000 + + dts->as / 1000; + break; + case NPY_FR_as: + /* only 9.2 secs */ + ret = (((((days * 24 + dts->hour) * 60 + dts->min) * 60 + + dts->sec) * + 1000000 + + dts->us) * + 1000000 + + dts->ps) * + 1000000 + + dts->as; + break; + default: + /* Something got corrupted */ + PyErr_SetString( + PyExc_ValueError, + "NumPy datetime metadata with corrupt unit value"); + return -1; + } + } + return ret; +} + +/* + * Port numpy#13188 https://github.com/numpy/numpy/pull/13188/ + * + * Computes the python `ret, d = divmod(d, unit)`. + * + * Note that GCC is smart enough at -O2 to eliminate the `if(*d < 0)` branch + * for subsequent calls to this command - it is able to deduce that `*d >= 0`. + */ +npy_int64 extract_unit(npy_datetime *d, npy_datetime unit) { + assert(unit > 0); + npy_int64 div = *d / unit; + npy_int64 mod = *d % unit; + if (mod < 0) { + mod += unit; + div -= 1; + } + assert(mod >= 0); + *d = mod; + return div; +} + +/* + * Converts a datetime based on the given metadata into a datetimestruct + */ +void pandas_datetime_to_datetimestruct(npy_datetime dt, + NPY_DATETIMEUNIT base, + npy_datetimestruct *out) { + npy_int64 perday; + + /* Initialize the output to all zeros */ + memset(out, 0, sizeof(npy_datetimestruct)); + out->year = 1970; + out->month = 1; + out->day = 1; + + /* + * Note that care must be taken with the / and % operators + * for negative values. + */ + switch (base) { + case NPY_FR_Y: + out->year = 1970 + dt; + break; + + case NPY_FR_M: + out->year = 1970 + extract_unit(&dt, 12); + out->month = dt + 1; + break; + + case NPY_FR_W: + /* A week is 7 days */ + set_datetimestruct_days(dt * 7, out); + break; + + case NPY_FR_D: + set_datetimestruct_days(dt, out); + break; + + case NPY_FR_h: + perday = 24LL; + + set_datetimestruct_days(extract_unit(&dt, perday), out); + out->hour = dt; + break; + + case NPY_FR_m: + perday = 24LL * 60; + + set_datetimestruct_days(extract_unit(&dt, perday), out); + out->hour = (int)extract_unit(&dt, 60); + out->min = (int)dt; + break; + + case NPY_FR_s: + perday = 24LL * 60 * 60; + + set_datetimestruct_days(extract_unit(&dt, perday), out); + out->hour = (int)extract_unit(&dt, 60 * 60); + out->min = (int)extract_unit(&dt, 60); + out->sec = (int)dt; + break; + + case NPY_FR_ms: + perday = 24LL * 60 * 60 * 1000; + + set_datetimestruct_days(extract_unit(&dt, perday), out); + out->hour = (int)extract_unit(&dt, 1000LL * 60 * 60); + out->min = (int)extract_unit(&dt, 1000LL * 60); + out->sec = (int)extract_unit(&dt, 1000LL); + out->us = (int)(dt * 1000); + break; + + case NPY_FR_us: + perday = 24LL * 60LL * 60LL * 1000LL * 1000LL; + + set_datetimestruct_days(extract_unit(&dt, perday), out); + out->hour = (int)extract_unit(&dt, 1000LL * 1000 * 60 * 60); + out->min = (int)extract_unit(&dt, 1000LL * 1000 * 60); + out->sec = (int)extract_unit(&dt, 1000LL * 1000); + out->us = (int)dt; + break; + + case NPY_FR_ns: + perday = 24LL * 60LL * 60LL * 1000LL * 1000LL * 1000LL; + + set_datetimestruct_days(extract_unit(&dt, perday), out); + out->hour = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 60 * 60); + out->min = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 60); + out->sec = (int)extract_unit(&dt, 1000LL * 1000 * 1000); + out->us = (int)extract_unit(&dt, 1000LL); + out->ps = (int)(dt * 1000); + break; + + case NPY_FR_ps: + perday = 24LL * 60 * 60 * 1000 * 1000 * 1000 * 1000; + + set_datetimestruct_days(extract_unit(&dt, perday), out); + out->hour = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 60 * 60); + out->min = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 60); + out->sec = (int)extract_unit(&dt, 1000LL * 1000 * 1000); + out->us = (int)extract_unit(&dt, 1000LL); + out->ps = (int)(dt * 1000); + break; + + case NPY_FR_fs: + /* entire range is only +- 2.6 hours */ + out->hour = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000 * + 1000 * 60 * 60); + if (out->hour < 0) { + out->year = 1969; + out->month = 12; + out->day = 31; + out->hour += 24; + assert(out->hour >= 0); + } + out->min = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000 * + 1000 * 60); + out->sec = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000 * + 1000); + out->us = (int)extract_unit(&dt, 1000LL * 1000 * 1000); + out->ps = (int)extract_unit(&dt, 1000LL); + out->as = (int)(dt * 1000); + break; + + case NPY_FR_as: + /* entire range is only +- 9.2 seconds */ + out->sec = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000 * + 1000 * 1000); + if (out->sec < 0) { + out->year = 1969; + out->month = 12; + out->day = 31; + out->hour = 23; + out->min = 59; + out->sec += 60; + assert(out->sec >= 0); + } + out->us = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000); + out->ps = (int)extract_unit(&dt, 1000LL * 1000); + out->as = (int)dt; + break; + + default: + PyErr_SetString(PyExc_RuntimeError, + "NumPy datetime metadata is corrupted with invalid " + "base unit"); + } +} + +/* + * Converts a timedelta from a timedeltastruct to a timedelta based + * on a metadata unit. The timedelta is assumed to be valid. + * + * Returns 0 on success, -1 on failure. + */ +void pandas_timedelta_to_timedeltastruct(npy_timedelta td, + NPY_DATETIMEUNIT base, + pandas_timedeltastruct *out) { + npy_int64 frac; + npy_int64 sfrac; + npy_int64 ifrac; + int sign; + npy_int64 per_day; + npy_int64 per_sec; + + /* Initialize the output to all zeros */ + memset(out, 0, sizeof(pandas_timedeltastruct)); + + switch (base) { + case NPY_FR_ns: + + per_day = 86400000000000LL; + per_sec = 1000LL * 1000LL * 1000LL; + + // put frac in seconds + if (td < 0 && td % per_sec != 0) + frac = td / per_sec - 1; + else + frac = td / per_sec; + + if (frac < 0) { + sign = -1; + + // even fraction + if ((-frac % 86400LL) != 0) { + out->days = -frac / 86400LL + 1; + frac += 86400LL * out->days; + } else { + frac = -frac; + } + } else { + sign = 1; + out->days = 0; + } + + if (frac >= 86400) { + out->days += frac / 86400LL; + frac -= out->days * 86400LL; + } + + if (frac >= 3600) { + out->hrs = frac / 3600LL; + frac -= out->hrs * 3600LL; + } else { + out->hrs = 0; + } + + if (frac >= 60) { + out->min = frac / 60LL; + frac -= out->min * 60LL; + } else { + out->min = 0; + } + + if (frac >= 0) { + out->sec = frac; + frac -= out->sec; + } else { + out->sec = 0; + } + + sfrac = (out->hrs * 3600LL + out->min * 60LL + + out->sec) * per_sec; + + if (sign < 0) + out->days = -out->days; + + ifrac = td - (out->days * per_day + sfrac); + + if (ifrac != 0) { + out->ms = ifrac / (1000LL * 1000LL); + ifrac -= out->ms * 1000LL * 1000LL; + out->us = ifrac / 1000LL; + ifrac -= out->us * 1000LL; + out->ns = ifrac; + } else { + out->ms = 0; + out->us = 0; + out->ns = 0; + } + break; + + case NPY_FR_us: + + per_day = 86400000000LL; + per_sec = 1000LL * 1000LL; + + // put frac in seconds + if (td < 0 && td % per_sec != 0) + frac = td / per_sec - 1; + else + frac = td / per_sec; + + if (frac < 0) { + sign = -1; + + // even fraction + if ((-frac % 86400LL) != 0) { + out->days = -frac / 86400LL + 1; + frac += 86400LL * out->days; + } else { + frac = -frac; + } + } else { + sign = 1; + out->days = 0; + } + + if (frac >= 86400) { + out->days += frac / 86400LL; + frac -= out->days * 86400LL; + } + + if (frac >= 3600) { + out->hrs = frac / 3600LL; + frac -= out->hrs * 3600LL; + } else { + out->hrs = 0; + } + + if (frac >= 60) { + out->min = frac / 60LL; + frac -= out->min * 60LL; + } else { + out->min = 0; + } + + if (frac >= 0) { + out->sec = frac; + frac -= out->sec; + } else { + out->sec = 0; + } + + sfrac = (out->hrs * 3600LL + out->min * 60LL + + out->sec) * per_sec; + + if (sign < 0) + out->days = -out->days; + + ifrac = td - (out->days * per_day + sfrac); + + if (ifrac != 0) { + out->ms = ifrac / 1000LL; + ifrac -= out->ms * 1000LL; + out->us = ifrac / 1L; + ifrac -= out->us * 1L; + out->ns = ifrac; + } else { + out->ms = 0; + out->us = 0; + out->ns = 0; + } + break; + + case NPY_FR_ms: + + per_day = 86400000LL; + per_sec = 1000LL; + + // put frac in seconds + if (td < 0 && td % per_sec != 0) + frac = td / per_sec - 1; + else + frac = td / per_sec; + + if (frac < 0) { + sign = -1; + + // even fraction + if ((-frac % 86400LL) != 0) { + out->days = -frac / 86400LL + 1; + frac += 86400LL * out->days; + } else { + frac = -frac; + } + } else { + sign = 1; + out->days = 0; + } + + if (frac >= 86400) { + out->days += frac / 86400LL; + frac -= out->days * 86400LL; + } + + if (frac >= 3600) { + out->hrs = frac / 3600LL; + frac -= out->hrs * 3600LL; + } else { + out->hrs = 0; + } + + if (frac >= 60) { + out->min = frac / 60LL; + frac -= out->min * 60LL; + } else { + out->min = 0; + } + + if (frac >= 0) { + out->sec = frac; + frac -= out->sec; + } else { + out->sec = 0; + } + + sfrac = (out->hrs * 3600LL + out->min * 60LL + + out->sec) * per_sec; + + if (sign < 0) + out->days = -out->days; + + ifrac = td - (out->days * per_day + sfrac); + + if (ifrac != 0) { + out->ms = ifrac; + out->us = 0; + out->ns = 0; + } else { + out->ms = 0; + out->us = 0; + out->ns = 0; + } + break; + + case NPY_FR_s: + // special case where we can simplify many expressions bc per_sec=1 + + per_day = 86400LL; + per_sec = 1L; + + // put frac in seconds + if (td < 0 && td % per_sec != 0) + frac = td / per_sec - 1; + else + frac = td / per_sec; + + if (frac < 0) { + sign = -1; + + // even fraction + if ((-frac % 86400LL) != 0) { + out->days = -frac / 86400LL + 1; + frac += 86400LL * out->days; + } else { + frac = -frac; + } + } else { + sign = 1; + out->days = 0; + } + + if (frac >= 86400) { + out->days += frac / 86400LL; + frac -= out->days * 86400LL; + } + + if (frac >= 3600) { + out->hrs = frac / 3600LL; + frac -= out->hrs * 3600LL; + } else { + out->hrs = 0; + } + + if (frac >= 60) { + out->min = frac / 60LL; + frac -= out->min * 60LL; + } else { + out->min = 0; + } + + if (frac >= 0) { + out->sec = frac; + frac -= out->sec; + } else { + out->sec = 0; + } + + sfrac = (out->hrs * 3600LL + out->min * 60LL + + out->sec) * per_sec; + + if (sign < 0) + out->days = -out->days; + + ifrac = td - (out->days * per_day + sfrac); + + if (ifrac != 0) { + out->ms = 0; + out->us = 0; + out->ns = 0; + } else { + out->ms = 0; + out->us = 0; + out->ns = 0; + } + break; + + case NPY_FR_m: + + out->days = td / 1440LL; + td -= out->days * 1440LL; + out->hrs = td / 60LL; + td -= out->hrs * 60LL; + out->min = td; + + out->sec = 0; + out->ms = 0; + out->us = 0; + out->ns = 0; + break; + + case NPY_FR_h: + out->days = td / 24LL; + td -= out->days * 24LL; + out->hrs = td; + + out->min = 0; + out->sec = 0; + out->ms = 0; + out->us = 0; + out->ns = 0; + break; + + case NPY_FR_D: + out->days = td; + out->hrs = 0; + out->min = 0; + out->sec = 0; + out->ms = 0; + out->us = 0; + out->ns = 0; + break; + + case NPY_FR_W: + out->days = 7 * td; + out->hrs = 0; + out->min = 0; + out->sec = 0; + out->ms = 0; + out->us = 0; + out->ns = 0; + break; + + default: + PyErr_SetString(PyExc_RuntimeError, + "NumPy timedelta metadata is corrupted with " + "invalid base unit"); + } + + out->seconds = out->hrs * 3600 + out->min * 60 + out->sec; + out->microseconds = out->ms * 1000 + out->us; + out->nanoseconds = out->ns; +} + + +/* + * This function returns a pointer to the DateTimeMetaData + * contained within the provided datetime dtype. + * + * Copied near-verbatim from numpy/core/src/multiarray/datetime.c + */ +PyArray_DatetimeMetaData +get_datetime_metadata_from_dtype(PyArray_Descr *dtype) { + return (((PyArray_DatetimeDTypeMetaData *)dtype->c_metadata)->meta); +} diff --git a/pandas/_libs/src/vendored/numpy/datetime/np_datetime_strings.c b/pandas/_libs/src/vendored/numpy/datetime/np_datetime_strings.c new file mode 100644 index 0000000000000..629d88ca6f589 --- /dev/null +++ b/pandas/_libs/src/vendored/numpy/datetime/np_datetime_strings.c @@ -0,0 +1,1150 @@ +/* + +Copyright (c) 2016, PyData Development Team +All rights reserved. + +Distributed under the terms of the BSD Simplified License. + +The full license is in the LICENSE file, distributed with this software. + +Written by Mark Wiebe (mwwiebe@gmail.com) +Copyright (c) 2011 by Enthought, Inc. + +Copyright (c) 2005-2011, NumPy Developers +All rights reserved. + +See NUMPY_LICENSE.txt for the license. + +This file implements string parsing and creation for NumPy datetime. + +*/ + +#define PY_SSIZE_T_CLEAN +#define NO_IMPORT + +#ifndef NPY_NO_DEPRECATED_API +#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION +#endif // NPY_NO_DEPRECATED_API + +#include + +#include + +#include +#include +#include + +#include "pandas/vendored/numpy/datetime/np_datetime.h" +#include "pandas/vendored/numpy/datetime/np_datetime_strings.h" + + +/* + * Parses (almost) standard ISO 8601 date strings. The differences are: + * + * + Only seconds may have a decimal point, with up to 18 digits after it + * (maximum attoseconds precision). + * + Either a 'T' as in ISO 8601 or a ' ' may be used to separate + * the date and the time. Both are treated equivalently. + * + Doesn't (yet) handle the "YYYY-DDD" or "YYYY-Www" formats. + * + Doesn't handle leap seconds (seconds value has 60 in these cases). + * + Doesn't handle 24:00:00 as synonym for midnight (00:00:00) tomorrow + * + Accepts special values "NaT" (not a time), "Today", (current + * day according to local time) and "Now" (current time in UTC). + * + ':' separator between hours, minutes, and seconds is optional. When + * omitted, each component must be 2 digits if it appears. (GH-10041) + * + * 'str' must be a NULL-terminated string, and 'len' must be its length. + * + * 'out' gets filled with the parsed date-time. + * 'out_local' gets set to 1 if the parsed time contains timezone, + * to 0 otherwise. + * 'out_tzoffset' gets set to timezone offset by minutes + * if the parsed time was in local time, + * to 0 otherwise. The values 'now' and 'today' don't get counted + * as local, and neither do UTC +/-#### timezone offsets, because + * they aren't using the computer's local timezone offset. + * + * Returns 0 on success, -1 on failure. + */ + +typedef enum { + COMPARISON_SUCCESS, + COMPLETED_PARTIAL_MATCH, + COMPARISON_ERROR +} DatetimePartParseResult; +// This function will advance the pointer on format +// and decrement characters_remaining by n on success +// On failure will return COMPARISON_ERROR without incrementing +// If `format_requirement` is PARTIAL_MATCH, and the `format` string has +// been exhausted, then return COMPLETED_PARTIAL_MATCH. +static DatetimePartParseResult compare_format( + const char **format, + int *characters_remaining, + const char *compare_to, + int n, + const FormatRequirement format_requirement +) { + if (format_requirement == INFER_FORMAT) { + return COMPARISON_SUCCESS; + } + if (*characters_remaining < 0) { + return COMPARISON_ERROR; + } + if (format_requirement == PARTIAL_MATCH && *characters_remaining == 0) { + return COMPLETED_PARTIAL_MATCH; + } + if (*characters_remaining < n) { + // TODO(pandas-dev): PyErr to differentiate what went wrong + return COMPARISON_ERROR; + } else { + if (strncmp(*format, compare_to, n)) { + // TODO(pandas-dev): PyErr to differentiate what went wrong + return COMPARISON_ERROR; + } else { + *format += n; + *characters_remaining -= n; + return COMPARISON_SUCCESS; + } + } + return COMPARISON_SUCCESS; +} + +int parse_iso_8601_datetime(const char *str, int len, int want_exc, + npy_datetimestruct *out, + NPY_DATETIMEUNIT *out_bestunit, + int *out_local, int *out_tzoffset, + const char* format, int format_len, + FormatRequirement format_requirement) { + if (len < 0 || format_len < 0) + goto parse_error; + int year_leap = 0; + int i, numdigits; + const char *substr; + int sublen; + NPY_DATETIMEUNIT bestunit = NPY_FR_GENERIC; + DatetimePartParseResult comparison; + + /* If year-month-day are separated by a valid separator, + * months/days without leading zeroes will be parsed + * (though not iso8601). If the components aren't separated, + * 4 (YYYY) or 8 (YYYYMMDD) digits are expected. 6 digits are + * forbidden here (but parsed as YYMMDD elsewhere). + */ + int has_ymd_sep = 0; + char ymd_sep = '\0'; + char valid_ymd_sep[] = {'-', '.', '/', '\\', ' '}; + int valid_ymd_sep_len = sizeof(valid_ymd_sep); + + /* hour-minute-second may or may not separated by ':'. If not, then + * each component must be 2 digits. */ + int has_hms_sep = 0; + int hour_was_2_digits = 0; + + /* Initialize the output to all zeros */ + memset(out, 0, sizeof(npy_datetimestruct)); + out->month = 1; + out->day = 1; + + substr = str; + sublen = len; + + /* Skip leading whitespace */ + while (sublen > 0 && isspace(*substr)) { + ++substr; + --sublen; + comparison = compare_format(&format, &format_len, " ", 1, format_requirement); + if (comparison == COMPARISON_ERROR) { + goto parse_error; + } else if (comparison == COMPLETED_PARTIAL_MATCH) { + goto finish; + } + } + + /* Leading '-' sign for negative year */ + if (*substr == '-') { + ++substr; + --sublen; + } + + if (sublen == 0) { + goto parse_error; + } + + /* PARSE THE YEAR (4 digits) */ + comparison = compare_format(&format, &format_len, "%Y", 2, format_requirement); + if (comparison == COMPARISON_ERROR) { + goto parse_error; + } else if (comparison == COMPLETED_PARTIAL_MATCH) { + goto finish; + } + + out->year = 0; + if (sublen >= 4 && isdigit(substr[0]) && isdigit(substr[1]) && + isdigit(substr[2]) && isdigit(substr[3])) { + out->year = 1000 * (substr[0] - '0') + 100 * (substr[1] - '0') + + 10 * (substr[2] - '0') + (substr[3] - '0'); + + substr += 4; + sublen -= 4; + } + + /* Negate the year if necessary */ + if (str[0] == '-') { + out->year = -out->year; + } + /* Check whether it's a leap-year */ + year_leap = is_leapyear(out->year); + + /* Next character must be a separator, start of month, or end of string */ + if (sublen == 0) { + if (out_local != NULL) { + *out_local = 0; + } + if (format_len) { + goto parse_error; + } + bestunit = NPY_FR_Y; + goto finish; + } + + if (!isdigit(*substr)) { + for (i = 0; i < valid_ymd_sep_len; ++i) { + if (*substr == valid_ymd_sep[i]) { + break; + } + } + if (i == valid_ymd_sep_len) { + goto parse_error; + } + has_ymd_sep = 1; + ymd_sep = valid_ymd_sep[i]; + ++substr; + --sublen; + + comparison = compare_format(&format, &format_len, &ymd_sep, 1, + format_requirement); + if (comparison == COMPARISON_ERROR) { + goto parse_error; + } else if (comparison == COMPLETED_PARTIAL_MATCH) { + goto finish; + } + /* Cannot have trailing separator */ + if (sublen == 0 || !isdigit(*substr)) { + goto parse_error; + } + } + + /* PARSE THE MONTH */ + comparison = compare_format(&format, &format_len, "%m", 2, format_requirement); + if (comparison == COMPARISON_ERROR) { + goto parse_error; + } else if (comparison == COMPLETED_PARTIAL_MATCH) { + goto finish; + } + /* First digit required */ + out->month = (*substr - '0'); + ++substr; + --sublen; + /* Second digit optional if there was a separator */ + if (isdigit(*substr)) { + out->month = 10 * out->month + (*substr - '0'); + ++substr; + --sublen; + } else if (!has_ymd_sep) { + goto parse_error; + } + if (out->month < 1 || out->month > 12) { + if (want_exc) { + PyErr_Format(PyExc_ValueError, + "Month out of range in datetime string \"%s\"", str); + } + goto error; + } + + /* Next character must be the separator, start of day, or end of string */ + if (sublen == 0) { + bestunit = NPY_FR_M; + /* Forbid YYYYMM. Parsed instead as YYMMDD by someone else. */ + if (!has_ymd_sep) { + goto parse_error; + } + if (format_len) { + goto parse_error; + } + if (out_local != NULL) { + *out_local = 0; + } + goto finish; + } + + if (has_ymd_sep) { + /* Must have separator, but cannot be trailing */ + if (*substr != ymd_sep || sublen == 1) { + goto parse_error; + } + ++substr; + --sublen; + comparison = compare_format(&format, &format_len, &ymd_sep, 1, + format_requirement); + if (comparison == COMPARISON_ERROR) { + goto parse_error; + } else if (comparison == COMPLETED_PARTIAL_MATCH) { + goto finish; + } + } + + /* PARSE THE DAY */ + comparison = compare_format(&format, &format_len, "%d", 2, format_requirement); + if (comparison == COMPARISON_ERROR) { + goto parse_error; + } else if (comparison == COMPLETED_PARTIAL_MATCH) { + goto finish; + } + /* First digit required */ + if (!isdigit(*substr)) { + goto parse_error; + } + out->day = (*substr - '0'); + ++substr; + --sublen; + /* Second digit optional if there was a separator */ + if (isdigit(*substr)) { + out->day = 10 * out->day + (*substr - '0'); + ++substr; + --sublen; + } else if (!has_ymd_sep) { + goto parse_error; + } + if (out->day < 1 || + out->day > days_per_month_table[year_leap][out->month - 1]) { + if (want_exc) { + PyErr_Format(PyExc_ValueError, + "Day out of range in datetime string \"%s\"", str); + } + goto error; + } + + /* Next character must be a 'T', ' ', or end of string */ + if (sublen == 0) { + if (out_local != NULL) { + *out_local = 0; + } + if (format_len) { + goto parse_error; + } + bestunit = NPY_FR_D; + goto finish; + } + + if ((*substr != 'T' && *substr != ' ') || sublen == 1) { + goto parse_error; + } + comparison = compare_format(&format, &format_len, substr, 1, format_requirement); + if (comparison == COMPARISON_ERROR) { + goto parse_error; + } else if (comparison == COMPLETED_PARTIAL_MATCH) { + goto finish; + } + ++substr; + --sublen; + + /* PARSE THE HOURS */ + comparison = compare_format(&format, &format_len, "%H", 2, format_requirement); + if (comparison == COMPARISON_ERROR) { + goto parse_error; + } else if (comparison == COMPLETED_PARTIAL_MATCH) { + goto finish; + } + /* First digit required */ + if (!isdigit(*substr)) { + goto parse_error; + } + out->hour = (*substr - '0'); + ++substr; + --sublen; + /* Second digit optional */ + if (isdigit(*substr)) { + hour_was_2_digits = 1; + out->hour = 10 * out->hour + (*substr - '0'); + ++substr; + --sublen; + if (out->hour >= 24) { + if (want_exc) { + PyErr_Format(PyExc_ValueError, + "Hours out of range in datetime string \"%s\"", + str); + } + goto error; + } + } + + /* Next character must be a ':' or the end of the string */ + if (sublen == 0) { + if (!hour_was_2_digits) { + goto parse_error; + } + if (format_len) { + goto parse_error; + } + bestunit = NPY_FR_h; + goto finish; + } + + if (*substr == ':') { + has_hms_sep = 1; + ++substr; + --sublen; + /* Cannot have a trailing separator */ + if (sublen == 0 || !isdigit(*substr)) { + goto parse_error; + } + comparison = compare_format(&format, &format_len, ":", 1, format_requirement); + if (comparison == COMPARISON_ERROR) { + goto parse_error; + } else if (comparison == COMPLETED_PARTIAL_MATCH) { + goto finish; + } + } else if (!isdigit(*substr)) { + if (!hour_was_2_digits) { + goto parse_error; + } + goto parse_timezone; + } + + /* PARSE THE MINUTES */ + comparison = compare_format(&format, &format_len, "%M", 2, format_requirement); + if (comparison == COMPARISON_ERROR) { + goto parse_error; + } else if (comparison == COMPLETED_PARTIAL_MATCH) { + goto finish; + } + /* First digit required */ + out->min = (*substr - '0'); + ++substr; + --sublen; + /* Second digit optional if there was a separator */ + if (isdigit(*substr)) { + out->min = 10 * out->min + (*substr - '0'); + ++substr; + --sublen; + if (out->min >= 60) { + if (want_exc) { + PyErr_Format(PyExc_ValueError, + "Minutes out of range in datetime string \"%s\"", + str); + } + goto error; + } + } else if (!has_hms_sep) { + goto parse_error; + } + + if (sublen == 0) { + bestunit = NPY_FR_m; + if (format_len) { + goto parse_error; + } + goto finish; + } + + /* If we make it through this condition block, then the next + * character is a digit. */ + if (has_hms_sep && *substr == ':') { + comparison = compare_format(&format, &format_len, ":", 1, format_requirement); + if (comparison == COMPARISON_ERROR) { + goto parse_error; + } else if (comparison == COMPLETED_PARTIAL_MATCH) { + goto finish; + } + ++substr; + --sublen; + /* Cannot have a trailing ':' */ + if (sublen == 0 || !isdigit(*substr)) { + goto parse_error; + } + } else if (!has_hms_sep && isdigit(*substr)) { + } else { + goto parse_timezone; + } + + /* PARSE THE SECONDS */ + comparison = compare_format(&format, &format_len, "%S", 2, format_requirement); + if (comparison == COMPARISON_ERROR) { + goto parse_error; + } else if (comparison == COMPLETED_PARTIAL_MATCH) { + goto finish; + } + /* First digit required */ + out->sec = (*substr - '0'); + ++substr; + --sublen; + /* Second digit optional if there was a separator */ + if (isdigit(*substr)) { + out->sec = 10 * out->sec + (*substr - '0'); + ++substr; + --sublen; + if (out->sec >= 60) { + if (want_exc) { + PyErr_Format(PyExc_ValueError, + "Seconds out of range in datetime string \"%s\"", + str); + } + goto error; + } + } else if (!has_hms_sep) { + goto parse_error; + } + + /* Next character may be a '.' indicating fractional seconds */ + if (sublen > 0 && *substr == '.') { + ++substr; + --sublen; + comparison = compare_format(&format, &format_len, ".", 1, format_requirement); + if (comparison == COMPARISON_ERROR) { + goto parse_error; + } else if (comparison == COMPLETED_PARTIAL_MATCH) { + goto finish; + } + } else { + bestunit = NPY_FR_s; + goto parse_timezone; + } + + /* PARSE THE MICROSECONDS (0 to 6 digits) */ + comparison = compare_format(&format, &format_len, "%f", 2, format_requirement); + if (comparison == COMPARISON_ERROR) { + goto parse_error; + } else if (comparison == COMPLETED_PARTIAL_MATCH) { + goto finish; + } + numdigits = 0; + for (i = 0; i < 6; ++i) { + out->us *= 10; + if (sublen > 0 && isdigit(*substr)) { + out->us += (*substr - '0'); + ++substr; + --sublen; + ++numdigits; + } + } + + if (sublen == 0 || !isdigit(*substr)) { + if (numdigits > 3) { + bestunit = NPY_FR_us; + } else { + bestunit = NPY_FR_ms; + } + goto parse_timezone; + } + + /* PARSE THE PICOSECONDS (0 to 6 digits) */ + numdigits = 0; + for (i = 0; i < 6; ++i) { + out->ps *= 10; + if (sublen > 0 && isdigit(*substr)) { + out->ps += (*substr - '0'); + ++substr; + --sublen; + ++numdigits; + } + } + + if (sublen == 0 || !isdigit(*substr)) { + if (numdigits > 3) { + bestunit = NPY_FR_ps; + } else { + bestunit = NPY_FR_ns; + } + goto parse_timezone; + } + + /* PARSE THE ATTOSECONDS (0 to 6 digits) */ + numdigits = 0; + for (i = 0; i < 6; ++i) { + out->as *= 10; + if (sublen > 0 && isdigit(*substr)) { + out->as += (*substr - '0'); + ++substr; + --sublen; + ++numdigits; + } + } + + if (numdigits > 3) { + bestunit = NPY_FR_as; + } else { + bestunit = NPY_FR_fs; + } + +parse_timezone: + /* trim any whitespace between time/timezone */ + while (sublen > 0 && isspace(*substr)) { + ++substr; + --sublen; + comparison = compare_format(&format, &format_len, " ", 1, format_requirement); + if (comparison == COMPARISON_ERROR) { + goto parse_error; + } else if (comparison == COMPLETED_PARTIAL_MATCH) { + goto finish; + } + } + + if (sublen == 0) { + // Unlike NumPy, treating no time zone as naive + if (format_len > 0) { + goto parse_error; + } + goto finish; + } + + /* UTC specifier */ + if (*substr == 'Z') { + comparison = compare_format(&format, &format_len, "%z", 2, format_requirement); + if (comparison == COMPARISON_ERROR) { + goto parse_error; + } else if (comparison == COMPLETED_PARTIAL_MATCH) { + goto finish; + } + /* "Z" should be equivalent to tz offset "+00:00" */ + if (out_local != NULL) { + *out_local = 1; + } + + if (out_tzoffset != NULL) { + *out_tzoffset = 0; + } + + if (sublen == 1) { + if (format_len > 0) { + goto parse_error; + } + goto finish; + } else { + ++substr; + --sublen; + } + } else if (*substr == '-' || *substr == '+') { + comparison = compare_format(&format, &format_len, "%z", 2, format_requirement); + if (comparison == COMPARISON_ERROR) { + goto parse_error; + } else if (comparison == COMPLETED_PARTIAL_MATCH) { + goto finish; + } + /* Time zone offset */ + int offset_neg = 0, offset_hour = 0, offset_minute = 0; + + /* + * Since "local" means local with respect to the current + * machine, we say this is non-local. + */ + + if (*substr == '-') { + offset_neg = 1; + } + ++substr; + --sublen; + + /* The hours offset */ + if (sublen >= 2 && isdigit(substr[0]) && isdigit(substr[1])) { + offset_hour = 10 * (substr[0] - '0') + (substr[1] - '0'); + substr += 2; + sublen -= 2; + if (offset_hour >= 24) { + if (want_exc) { + PyErr_Format(PyExc_ValueError, + "Timezone hours offset out of range " + "in datetime string \"%s\"", + str); + } + goto error; + } + } else if (sublen >= 1 && isdigit(substr[0])) { + offset_hour = substr[0] - '0'; + ++substr; + --sublen; + } else { + goto parse_error; + } + + /* The minutes offset is optional */ + if (sublen > 0) { + /* Optional ':' */ + if (*substr == ':') { + ++substr; + --sublen; + } + + /* The minutes offset (at the end of the string) */ + if (sublen >= 2 && isdigit(substr[0]) && isdigit(substr[1])) { + offset_minute = 10 * (substr[0] - '0') + (substr[1] - '0'); + substr += 2; + sublen -= 2; + if (offset_minute >= 60) { + if (want_exc) { + PyErr_Format(PyExc_ValueError, + "Timezone minutes offset out of range " + "in datetime string \"%s\"", + str); + } + goto error; + } + } else if (sublen >= 1 && isdigit(substr[0])) { + offset_minute = substr[0] - '0'; + ++substr; + --sublen; + } else { + goto parse_error; + } + } + + /* Apply the time zone offset */ + if (offset_neg) { + offset_hour = -offset_hour; + offset_minute = -offset_minute; + } + if (out_local != NULL) { + *out_local = 1; + // Unlike NumPy, do not change internal value to local time + *out_tzoffset = 60 * offset_hour + offset_minute; + } + } + + /* Skip trailing whitespace */ + while (sublen > 0 && isspace(*substr)) { + ++substr; + --sublen; + comparison = compare_format(&format, &format_len, " ", 1, format_requirement); + if (comparison == COMPARISON_ERROR) { + goto parse_error; + } else if (comparison == COMPLETED_PARTIAL_MATCH) { + goto finish; + } + } + + if ((sublen != 0) || (format_len != 0)) { + goto parse_error; + } + +finish: + if (out_bestunit != NULL) { + *out_bestunit = bestunit; + } + return 0; + +parse_error: + if (want_exc) { + PyErr_Format(PyExc_ValueError, + "Error parsing datetime string \"%s\" at position %d", str, + (int)(substr - str)); + } + return -1; + +error: + return -1; +} + +/* + * Provides a string length to use for converting datetime + * objects with the given local and unit settings. + */ +int get_datetime_iso_8601_strlen(int local, NPY_DATETIMEUNIT base) { + int len = 0; + + switch (base) { + /* Generic units can only be used to represent NaT */ + /* return 4;*/ + case NPY_FR_as: + len += 3; /* "###" */ + case NPY_FR_fs: + len += 3; /* "###" */ + case NPY_FR_ps: + len += 3; /* "###" */ + case NPY_FR_ns: + len += 3; /* "###" */ + case NPY_FR_us: + len += 3; /* "###" */ + case NPY_FR_ms: + len += 4; /* ".###" */ + case NPY_FR_s: + len += 3; /* ":##" */ + case NPY_FR_m: + len += 3; /* ":##" */ + case NPY_FR_h: + len += 3; /* "T##" */ + case NPY_FR_D: + case NPY_FR_W: + len += 3; /* "-##" */ + case NPY_FR_M: + len += 3; /* "-##" */ + case NPY_FR_Y: + len += 21; /* 64-bit year */ + break; + default: + len += 3; /* handle the now defunct NPY_FR_B */ + break; + } + + if (base >= NPY_FR_h) { + if (local) { + len += 5; /* "+####" or "-####" */ + } else { + len += 1; /* "Z" */ + } + } + + len += 1; /* NULL terminator */ + + return len; +} + + +/* + * Converts an npy_datetimestruct to an (almost) ISO 8601 + * NULL-terminated string using timezone Z (UTC). If the string fits in + * the space exactly, it leaves out the NULL terminator and returns success. + * + * The differences from ISO 8601 are the 'NaT' string, and + * the number of year digits is >= 4 instead of strictly 4. + * + * 'base' restricts the output to that unit. Set 'base' to + * -1 to auto-detect a base after which all the values are zero. + * + * Returns 0 on success, -1 on failure (for example if the output + * string was too short). + */ +int make_iso_8601_datetime(npy_datetimestruct *dts, char *outstr, int outlen, + int utc, NPY_DATETIMEUNIT base) { + char *substr = outstr; + int sublen = outlen; + int tmplen; + + /* + * Print weeks with the same precision as days. + * + * TODO: Could print weeks with YYYY-Www format if the week + * epoch is a Monday. + */ + if (base == NPY_FR_W) { + base = NPY_FR_D; + } + +/* YEAR */ +/* + * Can't use PyOS_snprintf, because it always produces a '\0' + * character at the end, and NumPy string types are permitted + * to have data all the way to the end of the buffer. + */ +#ifdef _WIN32 + tmplen = _snprintf(substr, sublen, "%04" NPY_INT64_FMT, dts->year); +#else + tmplen = snprintf(substr, sublen, "%04" NPY_INT64_FMT, dts->year); +#endif // _WIN32 + /* If it ran out of space or there isn't space for the NULL terminator */ + if (tmplen < 0 || tmplen > sublen) { + goto string_too_short; + } + substr += tmplen; + sublen -= tmplen; + + /* Stop if the unit is years */ + if (base == NPY_FR_Y) { + if (sublen > 0) { + *substr = '\0'; + } + return 0; + } + + /* MONTH */ + if (sublen < 1) { + goto string_too_short; + } + substr[0] = '-'; + if (sublen < 2) { + goto string_too_short; + } + substr[1] = (char)((dts->month / 10) + '0'); + if (sublen < 3) { + goto string_too_short; + } + substr[2] = (char)((dts->month % 10) + '0'); + substr += 3; + sublen -= 3; + + /* Stop if the unit is months */ + if (base == NPY_FR_M) { + if (sublen > 0) { + *substr = '\0'; + } + return 0; + } + + /* DAY */ + if (sublen < 1) { + goto string_too_short; + } + substr[0] = '-'; + if (sublen < 2) { + goto string_too_short; + } + substr[1] = (char)((dts->day / 10) + '0'); + if (sublen < 3) { + goto string_too_short; + } + substr[2] = (char)((dts->day % 10) + '0'); + substr += 3; + sublen -= 3; + + /* Stop if the unit is days */ + if (base == NPY_FR_D) { + if (sublen > 0) { + *substr = '\0'; + } + return 0; + } + + /* HOUR */ + if (sublen < 1) { + goto string_too_short; + } + substr[0] = 'T'; + if (sublen < 2) { + goto string_too_short; + } + substr[1] = (char)((dts->hour / 10) + '0'); + if (sublen < 3) { + goto string_too_short; + } + substr[2] = (char)((dts->hour % 10) + '0'); + substr += 3; + sublen -= 3; + + /* Stop if the unit is hours */ + if (base == NPY_FR_h) { + goto add_time_zone; + } + + /* MINUTE */ + if (sublen < 1) { + goto string_too_short; + } + substr[0] = ':'; + if (sublen < 2) { + goto string_too_short; + } + substr[1] = (char)((dts->min / 10) + '0'); + if (sublen < 3) { + goto string_too_short; + } + substr[2] = (char)((dts->min % 10) + '0'); + substr += 3; + sublen -= 3; + + /* Stop if the unit is minutes */ + if (base == NPY_FR_m) { + goto add_time_zone; + } + + /* SECOND */ + if (sublen < 1) { + goto string_too_short; + } + substr[0] = ':'; + if (sublen < 2) { + goto string_too_short; + } + substr[1] = (char)((dts->sec / 10) + '0'); + if (sublen < 3) { + goto string_too_short; + } + substr[2] = (char)((dts->sec % 10) + '0'); + substr += 3; + sublen -= 3; + + /* Stop if the unit is seconds */ + if (base == NPY_FR_s) { + goto add_time_zone; + } + + /* MILLISECOND */ + if (sublen < 1) { + goto string_too_short; + } + substr[0] = '.'; + if (sublen < 2) { + goto string_too_short; + } + substr[1] = (char)((dts->us / 100000) % 10 + '0'); + if (sublen < 3) { + goto string_too_short; + } + substr[2] = (char)((dts->us / 10000) % 10 + '0'); + if (sublen < 4) { + goto string_too_short; + } + substr[3] = (char)((dts->us / 1000) % 10 + '0'); + substr += 4; + sublen -= 4; + + /* Stop if the unit is milliseconds */ + if (base == NPY_FR_ms) { + goto add_time_zone; + } + + /* MICROSECOND */ + if (sublen < 1) { + goto string_too_short; + } + substr[0] = (char)((dts->us / 100) % 10 + '0'); + if (sublen < 2) { + goto string_too_short; + } + substr[1] = (char)((dts->us / 10) % 10 + '0'); + if (sublen < 3) { + goto string_too_short; + } + substr[2] = (char)(dts->us % 10 + '0'); + substr += 3; + sublen -= 3; + + /* Stop if the unit is microseconds */ + if (base == NPY_FR_us) { + goto add_time_zone; + } + + /* NANOSECOND */ + if (sublen < 1) { + goto string_too_short; + } + substr[0] = (char)((dts->ps / 100000) % 10 + '0'); + if (sublen < 2) { + goto string_too_short; + } + substr[1] = (char)((dts->ps / 10000) % 10 + '0'); + if (sublen < 3) { + goto string_too_short; + } + substr[2] = (char)((dts->ps / 1000) % 10 + '0'); + substr += 3; + sublen -= 3; + + /* Stop if the unit is nanoseconds */ + if (base == NPY_FR_ns) { + goto add_time_zone; + } + + /* PICOSECOND */ + if (sublen < 1) { + goto string_too_short; + } + substr[0] = (char)((dts->ps / 100) % 10 + '0'); + if (sublen < 2) { + goto string_too_short; + } + substr[1] = (char)((dts->ps / 10) % 10 + '0'); + if (sublen < 3) { + goto string_too_short; + } + substr[2] = (char)(dts->ps % 10 + '0'); + substr += 3; + sublen -= 3; + + /* Stop if the unit is picoseconds */ + if (base == NPY_FR_ps) { + goto add_time_zone; + } + + /* FEMTOSECOND */ + if (sublen < 1) { + goto string_too_short; + } + substr[0] = (char)((dts->as / 100000) % 10 + '0'); + if (sublen < 2) { + goto string_too_short; + } + substr[1] = (char)((dts->as / 10000) % 10 + '0'); + if (sublen < 3) { + goto string_too_short; + } + substr[2] = (char)((dts->as / 1000) % 10 + '0'); + substr += 3; + sublen -= 3; + + /* Stop if the unit is femtoseconds */ + if (base == NPY_FR_fs) { + goto add_time_zone; + } + + /* ATTOSECOND */ + if (sublen < 1) { + goto string_too_short; + } + substr[0] = (char)((dts->as / 100) % 10 + '0'); + if (sublen < 2) { + goto string_too_short; + } + substr[1] = (char)((dts->as / 10) % 10 + '0'); + if (sublen < 3) { + goto string_too_short; + } + substr[2] = (char)(dts->as % 10 + '0'); + substr += 3; + sublen -= 3; + +add_time_zone: + /* UTC "Zulu" time */ + if (utc) { + if (sublen < 1) { + goto string_too_short; + } + substr[0] = 'Z'; + substr += 1; + sublen -= 1; + } + /* Add a NULL terminator, and return */ + if (sublen > 0) { + substr[0] = '\0'; + } + + return 0; + +string_too_short: + PyErr_Format(PyExc_RuntimeError, + "The string provided for NumPy ISO datetime formatting " + "was too short, with length %d", + outlen); + return -1; +} + + +int make_iso_8601_timedelta(pandas_timedeltastruct *tds, + char *outstr, size_t *outlen) { + *outlen = 0; + *outlen += snprintf(outstr, 60, // NOLINT + "P%" NPY_INT64_FMT + "DT%" NPY_INT32_FMT + "H%" NPY_INT32_FMT + "M%" NPY_INT32_FMT, + tds->days, tds->hrs, tds->min, tds->sec); + outstr += *outlen; + + if (tds->ns != 0) { + *outlen += snprintf(outstr, 12, // NOLINT + ".%03" NPY_INT32_FMT + "%03" NPY_INT32_FMT + "%03" NPY_INT32_FMT + "S", tds->ms, tds->us, tds->ns); + } else if (tds->us != 0) { + *outlen += snprintf(outstr, 9, // NOLINT + ".%03" NPY_INT32_FMT + "%03" NPY_INT32_FMT + "S", tds->ms, tds->us); + } else if (tds->ms != 0) { + *outlen += snprintf(outstr, 6, // NOLINT + ".%03" NPY_INT32_FMT "S", tds->ms); + } else { + *outlen += snprintf(outstr, 2, // NOLINT + "%s", "S"); + } + + return 0; +} diff --git a/pandas/_libs/src/vendored/ujson/lib/ultrajsondec.c b/pandas/_libs/src/vendored/ujson/lib/ultrajsondec.c new file mode 100644 index 0000000000000..9ec12cb242728 --- /dev/null +++ b/pandas/_libs/src/vendored/ujson/lib/ultrajsondec.c @@ -0,0 +1,1208 @@ +/* +Copyright (c) 2011-2013, ESN Social Software AB and Jonas Tarnstrom +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: +* Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +* Neither the name of the ESN Social Software AB nor the +names of its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL ESN SOCIAL SOFTWARE AB OR JONAS TARNSTROM BE +LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +Portions of code from MODP_ASCII - Ascii transformations (upper/lower, etc) +https://github.com/client9/stringencoders +Copyright (c) 2007 Nick Galbreath -- nickg [at] modp [dot] com. All rights +reserved. + +Numeric decoder derived from TCL library +https://www.opensource.apple.com/source/tcl/tcl-14/tcl/license.terms +* Copyright (c) 1988-1993 The Regents of the University of California. +* Copyright (c) 1994 Sun Microsystems, Inc. +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "pandas/vendored/ujson/lib/ultrajson.h" + +#ifndef TRUE +#define TRUE 1 +#define FALSE 0 +#endif +#ifndef NULL +#define NULL 0 +#endif + +struct DecoderState { + char *start; + char *end; + wchar_t *escStart; + wchar_t *escEnd; + int escHeap; + int lastType; + JSUINT32 objDepth; + void *prv; + JSONObjectDecoder *dec; +}; + +JSOBJ FASTCALL_MSVC decode_any(struct DecoderState *ds); +typedef JSOBJ (*PFN_DECODER)(struct DecoderState *ds); + +static JSOBJ SetError(struct DecoderState *ds, int offset, + const char *message) { + ds->dec->errorOffset = ds->start + offset; + ds->dec->errorStr = (char *)message; + return NULL; +} + +double createDouble(double intNeg, double intValue, double frcValue, + int frcDecimalCount) { + static const double g_pow10[] = {1.0, + 0.1, + 0.01, + 0.001, + 0.0001, + 0.00001, + 0.000001, + 0.0000001, + 0.00000001, + 0.000000001, + 0.0000000001, + 0.00000000001, + 0.000000000001, + 0.0000000000001, + 0.00000000000001, + 0.000000000000001}; + return (intValue + (frcValue * g_pow10[frcDecimalCount])) * intNeg; +} + +JSOBJ FASTCALL_MSVC decodePreciseFloat(struct DecoderState *ds) { + char *end; + double value; + errno = 0; + + value = strtod(ds->start, &end); + + if (errno == ERANGE) { + return SetError(ds, -1, "Range error when decoding numeric as double"); + } + + ds->start = end; + return ds->dec->newDouble(ds->prv, value); +} + +JSOBJ FASTCALL_MSVC decode_numeric(struct DecoderState *ds) { + int intNeg = 1; + JSUINT64 intValue; + JSUINT64 prevIntValue; + int chr; + int decimalCount = 0; + double frcValue = 0.0; + double expNeg; + double expValue; + char *offset = ds->start; + + JSUINT64 overflowLimit = LLONG_MAX; + + if (*(offset) == 'I') { + goto DECODE_INF; + } else if (*(offset) == 'N') { + goto DECODE_NAN; + } else if (*(offset) == '-') { + offset++; + intNeg = -1; + overflowLimit = LLONG_MIN; + if (*(offset) == 'I') { + goto DECODE_INF; + } + } + + // Scan integer part + intValue = 0; + + while (1) { + chr = (int)(unsigned char)*(offset); + + switch (chr) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': { + // PERF: Don't do 64-bit arithmetic here unless we have to + prevIntValue = intValue; + intValue = intValue * 10ULL + (JSLONG) (chr - 48); + + if (intNeg == 1 && prevIntValue > intValue) { + return SetError(ds, -1, "Value is too big!"); + } else if (intNeg == -1 && intValue > overflowLimit) { + return SetError(ds, -1, overflowLimit == LLONG_MAX ? + "Value is too big!" : "Value is too small"); + } + + offset++; + break; + } + case '.': { + offset++; + goto DECODE_FRACTION; + break; + } + case 'e': + case 'E': { + offset++; + goto DECODE_EXPONENT; + break; + } + + default: { + goto BREAK_INT_LOOP; + break; + } + } + } + +BREAK_INT_LOOP: + + ds->lastType = JT_INT; + ds->start = offset; + + if (intNeg == 1 && (intValue & 0x8000000000000000ULL) != 0) + return ds->dec->newUnsignedLong(ds->prv, intValue); + else if ((intValue >> 31)) + return ds->dec->newLong(ds->prv, (JSINT64)(intValue * (JSINT64)intNeg)); + else + return ds->dec->newInt(ds->prv, (JSINT32)(intValue * intNeg)); + +DECODE_FRACTION: + + if (ds->dec->preciseFloat) { + return decodePreciseFloat(ds); + } + + // Scan fraction part + frcValue = 0.0; + for (;;) { + chr = (int)(unsigned char)*(offset); + + switch (chr) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': { + if (decimalCount < JSON_DOUBLE_MAX_DECIMALS) { + frcValue = frcValue * 10.0 + (double)(chr - 48); + decimalCount++; + } + offset++; + break; + } + case 'e': + case 'E': { + offset++; + goto DECODE_EXPONENT; + break; + } + default: { goto BREAK_FRC_LOOP; } + } + } + +BREAK_FRC_LOOP: + // FIXME: Check for arithmetic overflow here + ds->lastType = JT_DOUBLE; + ds->start = offset; + return ds->dec->newDouble( + ds->prv, + createDouble((double)intNeg, (double)intValue, frcValue, decimalCount)); + +DECODE_EXPONENT: + if (ds->dec->preciseFloat) { + return decodePreciseFloat(ds); + } + + expNeg = 1.0; + + if (*(offset) == '-') { + expNeg = -1.0; + offset++; + } else if (*(offset) == '+') { + expNeg = +1.0; + offset++; + } + + expValue = 0.0; + + for (;;) { + chr = (int)(unsigned char)*(offset); + + switch (chr) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': { + expValue = expValue * 10.0 + (double)(chr - 48); + offset++; + break; + } + default: { goto BREAK_EXP_LOOP; } + } + } + +DECODE_NAN: + offset++; + if (*(offset++) != 'a') goto SET_NAN_ERROR; + if (*(offset++) != 'N') goto SET_NAN_ERROR; + + ds->lastType = JT_NULL; + ds->start = offset; + return ds->dec->newNull(ds->prv); + +SET_NAN_ERROR: + return SetError(ds, -1, "Unexpected character found when decoding 'NaN'"); + +DECODE_INF: + offset++; + if (*(offset++) != 'n') goto SET_INF_ERROR; + if (*(offset++) != 'f') goto SET_INF_ERROR; + if (*(offset++) != 'i') goto SET_INF_ERROR; + if (*(offset++) != 'n') goto SET_INF_ERROR; + if (*(offset++) != 'i') goto SET_INF_ERROR; + if (*(offset++) != 't') goto SET_INF_ERROR; + if (*(offset++) != 'y') goto SET_INF_ERROR; + + ds->start = offset; + + if (intNeg == 1) { + ds->lastType = JT_POS_INF; + return ds->dec->newPosInf(ds->prv); + } else { + ds->lastType = JT_NEG_INF; + return ds->dec->newNegInf(ds->prv); + } + +SET_INF_ERROR: + if (intNeg == 1) { + const char *msg = "Unexpected character found when decoding 'Infinity'"; + return SetError(ds, -1, msg); + } else { + const char *msg = "Unexpected character found when decoding '-Infinity'"; + return SetError(ds, -1, msg); + } + + +BREAK_EXP_LOOP: + // FIXME: Check for arithmetic overflow here + ds->lastType = JT_DOUBLE; + ds->start = offset; + return ds->dec->newDouble( + ds->prv, + createDouble((double)intNeg, (double)intValue, frcValue, decimalCount) * + pow(10.0, expValue * expNeg)); +} + +JSOBJ FASTCALL_MSVC decode_true(struct DecoderState *ds) { + char *offset = ds->start; + offset++; + + if (*(offset++) != 'r') goto SETERROR; + if (*(offset++) != 'u') goto SETERROR; + if (*(offset++) != 'e') goto SETERROR; + + ds->lastType = JT_TRUE; + ds->start = offset; + return ds->dec->newTrue(ds->prv); + +SETERROR: + return SetError(ds, -1, "Unexpected character found when decoding 'true'"); +} + +JSOBJ FASTCALL_MSVC decode_false(struct DecoderState *ds) { + char *offset = ds->start; + offset++; + + if (*(offset++) != 'a') goto SETERROR; + if (*(offset++) != 'l') goto SETERROR; + if (*(offset++) != 's') goto SETERROR; + if (*(offset++) != 'e') goto SETERROR; + + ds->lastType = JT_FALSE; + ds->start = offset; + return ds->dec->newFalse(ds->prv); + +SETERROR: + return SetError(ds, -1, "Unexpected character found when decoding 'false'"); +} + +JSOBJ FASTCALL_MSVC decode_null(struct DecoderState *ds) { + char *offset = ds->start; + offset++; + + if (*(offset++) != 'u') goto SETERROR; + if (*(offset++) != 'l') goto SETERROR; + if (*(offset++) != 'l') goto SETERROR; + + ds->lastType = JT_NULL; + ds->start = offset; + return ds->dec->newNull(ds->prv); + +SETERROR: + return SetError(ds, -1, "Unexpected character found when decoding 'null'"); +} + +void FASTCALL_MSVC SkipWhitespace(struct DecoderState *ds) { + char *offset; + + for (offset = ds->start; (ds->end - offset) > 0; offset++) { + switch (*offset) { + case ' ': + case '\t': + case '\r': + case '\n': + break; + + default: + ds->start = offset; + return; + } + } + + if (offset == ds->end) { + ds->start = ds->end; + } +} + +enum DECODESTRINGSTATE { + DS_ISNULL = 0x32, + DS_ISQUOTE, + DS_ISESCAPE, + DS_UTFLENERROR, +}; + +static const JSUINT8 g_decoderLookup[256] = { + /* 0x00 */ DS_ISNULL, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + /* 0x10 */ 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + /* 0x20 */ 1, + 1, + DS_ISQUOTE, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + /* 0x30 */ 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + /* 0x40 */ 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + /* 0x50 */ 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + DS_ISESCAPE, + 1, + 1, + 1, + /* 0x60 */ 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + /* 0x70 */ 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + /* 0x80 */ 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + /* 0x90 */ 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + /* 0xa0 */ 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + /* 0xb0 */ 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + /* 0xc0 */ 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + /* 0xd0 */ 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + /* 0xe0 */ 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + /* 0xf0 */ 4, + 4, + 4, + 4, + 4, + 4, + 4, + 4, + DS_UTFLENERROR, + DS_UTFLENERROR, + DS_UTFLENERROR, + DS_UTFLENERROR, + DS_UTFLENERROR, + DS_UTFLENERROR, + DS_UTFLENERROR, + DS_UTFLENERROR, +}; + +JSOBJ FASTCALL_MSVC decode_string(struct DecoderState *ds) { + JSUTF16 sur[2] = {0}; + int iSur = 0; + int index; + wchar_t *escOffset; + wchar_t *escStart; + size_t escLen = (ds->escEnd - ds->escStart); + JSUINT8 *inputOffset; + JSUINT8 oct; + JSUTF32 ucs; + ds->lastType = JT_INVALID; + ds->start++; + + if ((size_t)(ds->end - ds->start) > escLen) { + size_t newSize = (ds->end - ds->start); + + if (ds->escHeap) { + if (newSize > (SIZE_MAX / sizeof(wchar_t))) { + return SetError(ds, -1, "Could not reserve memory block"); + } + escStart = (wchar_t *)ds->dec->realloc(ds->escStart, + newSize * sizeof(wchar_t)); + if (!escStart) { + ds->dec->free(ds->escStart); + return SetError(ds, -1, "Could not reserve memory block"); + } + ds->escStart = escStart; + } else { + wchar_t *oldStart = ds->escStart; + if (newSize > (SIZE_MAX / sizeof(wchar_t))) { + return SetError(ds, -1, "Could not reserve memory block"); + } + ds->escStart = + (wchar_t *)ds->dec->malloc(newSize * sizeof(wchar_t)); + if (!ds->escStart) { + return SetError(ds, -1, "Could not reserve memory block"); + } + ds->escHeap = 1; + memcpy(ds->escStart, oldStart, escLen * sizeof(wchar_t)); + } + + ds->escEnd = ds->escStart + newSize; + } + + escOffset = ds->escStart; + inputOffset = (JSUINT8 *)ds->start; + + for (;;) { + switch (g_decoderLookup[(JSUINT8)(*inputOffset)]) { + case DS_ISNULL: { + return SetError(ds, -1, + "Unmatched ''\"' when when decoding 'string'"); + } + case DS_ISQUOTE: { + ds->lastType = JT_UTF8; + inputOffset++; + ds->start += ((char *)inputOffset - (ds->start)); + return ds->dec->newString(ds->prv, ds->escStart, escOffset); + } + case DS_UTFLENERROR: { + return SetError( + ds, -1, + "Invalid UTF-8 sequence length when decoding 'string'"); + } + case DS_ISESCAPE: + inputOffset++; + switch (*inputOffset) { + case '\\': + *(escOffset++) = L'\\'; + inputOffset++; + continue; + case '\"': + *(escOffset++) = L'\"'; + inputOffset++; + continue; + case '/': + *(escOffset++) = L'/'; + inputOffset++; + continue; + case 'b': + *(escOffset++) = L'\b'; + inputOffset++; + continue; + case 'f': + *(escOffset++) = L'\f'; + inputOffset++; + continue; + case 'n': + *(escOffset++) = L'\n'; + inputOffset++; + continue; + case 'r': + *(escOffset++) = L'\r'; + inputOffset++; + continue; + case 't': + *(escOffset++) = L'\t'; + inputOffset++; + continue; + + case 'u': { + int index; + inputOffset++; + + for (index = 0; index < 4; index++) { + switch (*inputOffset) { + case '\0': + return SetError(ds, -1, + "Unterminated unicode " + "escape sequence when " + "decoding 'string'"); + default: + return SetError(ds, -1, + "Unexpected character in " + "unicode escape sequence " + "when decoding 'string'"); + + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + sur[iSur] = (sur[iSur] << 4) + + (JSUTF16)(*inputOffset - '0'); + break; + + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + sur[iSur] = (sur[iSur] << 4) + 10 + + (JSUTF16)(*inputOffset - 'a'); + break; + + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + sur[iSur] = (sur[iSur] << 4) + 10 + + (JSUTF16)(*inputOffset - 'A'); + break; + } + + inputOffset++; + } + + if (iSur == 0) { + if ((sur[iSur] & 0xfc00) == 0xd800) { + // First of a surrogate pair, continue parsing + iSur++; + break; + } + (*escOffset++) = (wchar_t)sur[iSur]; + iSur = 0; + } else { + // Decode pair + if ((sur[1] & 0xfc00) != 0xdc00) { + return SetError(ds, -1, + "Unpaired high surrogate when " + "decoding 'string'"); + } +#if WCHAR_MAX == 0xffff + (*escOffset++) = (wchar_t)sur[0]; + (*escOffset++) = (wchar_t)sur[1]; +#else + (*escOffset++) = + (wchar_t)0x10000 + + (((sur[0] - 0xd800) << 10) | (sur[1] - 0xdc00)); +#endif + iSur = 0; + } + break; + } + + case '\0': + return SetError(ds, -1, + "Unterminated escape sequence when " + "decoding 'string'"); + default: + return SetError(ds, -1, + "Unrecognized escape sequence when " + "decoding 'string'"); + } + break; + + case 1: { + *(escOffset++) = (wchar_t)(*inputOffset++); + break; + } + + case 2: { + ucs = (*inputOffset++) & 0x1f; + ucs <<= 6; + if (((*inputOffset) & 0x80) != 0x80) { + return SetError(ds, -1, + "Invalid octet in UTF-8 sequence when " + "decoding 'string'"); + } + ucs |= (*inputOffset++) & 0x3f; + if (ucs < 0x80) + return SetError(ds, -1, + "Overlong 2 byte UTF-8 sequence detected " + "when decoding 'string'"); + *(escOffset++) = (wchar_t)ucs; + break; + } + + case 3: { + JSUTF32 ucs = 0; + ucs |= (*inputOffset++) & 0x0f; + + for (index = 0; index < 2; index++) { + ucs <<= 6; + oct = (*inputOffset++); + + if ((oct & 0x80) != 0x80) { + return SetError(ds, -1, + "Invalid octet in UTF-8 sequence when " + "decoding 'string'"); + } + + ucs |= oct & 0x3f; + } + + if (ucs < 0x800) + return SetError(ds, -1, + "Overlong 3 byte UTF-8 sequence detected " + "when encoding string"); + *(escOffset++) = (wchar_t)ucs; + break; + } + + case 4: { + JSUTF32 ucs = 0; + ucs |= (*inputOffset++) & 0x07; + + for (index = 0; index < 3; index++) { + ucs <<= 6; + oct = (*inputOffset++); + + if ((oct & 0x80) != 0x80) { + return SetError(ds, -1, + "Invalid octet in UTF-8 sequence when " + "decoding 'string'"); + } + + ucs |= oct & 0x3f; + } + + if (ucs < 0x10000) + return SetError(ds, -1, + "Overlong 4 byte UTF-8 sequence detected " + "when decoding 'string'"); + +#if WCHAR_MAX == 0xffff + if (ucs >= 0x10000) { + ucs -= 0x10000; + *(escOffset++) = (wchar_t)(ucs >> 10) + 0xd800; + *(escOffset++) = (wchar_t)(ucs & 0x3ff) + 0xdc00; + } else { + *(escOffset++) = (wchar_t)ucs; + } +#else + *(escOffset++) = (wchar_t)ucs; +#endif + break; + } + } + } +} + +JSOBJ FASTCALL_MSVC decode_array(struct DecoderState *ds) { + JSOBJ itemValue; + JSOBJ newObj; + int len; + ds->objDepth++; + if (ds->objDepth > JSON_MAX_OBJECT_DEPTH) { + return SetError(ds, -1, "Reached object decoding depth limit"); + } + + newObj = ds->dec->newArray(ds->prv, ds->dec); + len = 0; + + ds->lastType = JT_INVALID; + ds->start++; + + for (;;) { + SkipWhitespace(ds); + + if ((*ds->start) == ']') { + ds->objDepth--; + if (len == 0) { + ds->start++; + return ds->dec->endArray(ds->prv, newObj); + } + + ds->dec->releaseObject(ds->prv, newObj, ds->dec); + return SetError( + ds, -1, + "Unexpected character found when decoding array value (1)"); + } + + itemValue = decode_any(ds); + + if (itemValue == NULL) { + ds->dec->releaseObject(ds->prv, newObj, ds->dec); + return NULL; + } + + if (!ds->dec->arrayAddItem(ds->prv, newObj, itemValue)) { + ds->dec->releaseObject(ds->prv, newObj, ds->dec); + return NULL; + } + + SkipWhitespace(ds); + + switch (*(ds->start++)) { + case ']': { + ds->objDepth--; + return ds->dec->endArray(ds->prv, newObj); + } + case ',': + break; + + default: + ds->dec->releaseObject(ds->prv, newObj, ds->dec); + return SetError( + ds, -1, + "Unexpected character found when decoding array value (2)"); + } + + len++; + } +} + +JSOBJ FASTCALL_MSVC decode_object(struct DecoderState *ds) { + JSOBJ itemName; + JSOBJ itemValue; + JSOBJ newObj; + + ds->objDepth++; + if (ds->objDepth > JSON_MAX_OBJECT_DEPTH) { + return SetError(ds, -1, "Reached object decoding depth limit"); + } + + newObj = ds->dec->newObject(ds->prv, ds->dec); + + ds->start++; + + for (;;) { + SkipWhitespace(ds); + + if ((*ds->start) == '}') { + ds->objDepth--; + ds->start++; + return ds->dec->endObject(ds->prv, newObj); + } + + ds->lastType = JT_INVALID; + itemName = decode_any(ds); + + if (itemName == NULL) { + ds->dec->releaseObject(ds->prv, newObj, ds->dec); + return NULL; + } + + if (ds->lastType != JT_UTF8) { + ds->dec->releaseObject(ds->prv, newObj, ds->dec); + ds->dec->releaseObject(ds->prv, itemName, ds->dec); + return SetError( + ds, -1, + "Key name of object must be 'string' when decoding 'object'"); + } + + SkipWhitespace(ds); + + if (*(ds->start++) != ':') { + ds->dec->releaseObject(ds->prv, newObj, ds->dec); + ds->dec->releaseObject(ds->prv, itemName, ds->dec); + return SetError(ds, -1, "No ':' found when decoding object value"); + } + + SkipWhitespace(ds); + + itemValue = decode_any(ds); + + if (itemValue == NULL) { + ds->dec->releaseObject(ds->prv, newObj, ds->dec); + ds->dec->releaseObject(ds->prv, itemName, ds->dec); + return NULL; + } + + if (!ds->dec->objectAddKey(ds->prv, newObj, itemName, itemValue)) { + ds->dec->releaseObject(ds->prv, newObj, ds->dec); + ds->dec->releaseObject(ds->prv, itemName, ds->dec); + ds->dec->releaseObject(ds->prv, itemValue, ds->dec); + return NULL; + } + + SkipWhitespace(ds); + + switch (*(ds->start++)) { + case '}': { + ds->objDepth--; + return ds->dec->endObject(ds->prv, newObj); + } + case ',': + break; + + default: + ds->dec->releaseObject(ds->prv, newObj, ds->dec); + return SetError( + ds, -1, + "Unexpected character found when decoding object value"); + } + } +} + +JSOBJ FASTCALL_MSVC decode_any(struct DecoderState *ds) { + for (;;) { + switch (*ds->start) { + case '\"': + return decode_string(ds); + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case 'I': + case 'N': + case '-': + return decode_numeric(ds); + + case '[': + return decode_array(ds); + case '{': + return decode_object(ds); + case 't': + return decode_true(ds); + case 'f': + return decode_false(ds); + case 'n': + return decode_null(ds); + + case ' ': + case '\t': + case '\r': + case '\n': + // White space + ds->start++; + break; + + default: + return SetError(ds, -1, "Expected object or value"); + } + } +} + +JSOBJ JSON_DecodeObject(JSONObjectDecoder *dec, const char *buffer, + size_t cbBuffer) { + /* + FIXME: Base the size of escBuffer of that of cbBuffer so that the unicode + escaping doesn't run into the wall each time */ + char *locale; + struct DecoderState ds; + wchar_t escBuffer[(JSON_MAX_STACK_BUFFER_SIZE / sizeof(wchar_t))]; + JSOBJ ret; + + ds.start = (char *)buffer; + ds.end = ds.start + cbBuffer; + + ds.escStart = escBuffer; + ds.escEnd = ds.escStart + (JSON_MAX_STACK_BUFFER_SIZE / sizeof(wchar_t)); + ds.escHeap = 0; + ds.prv = dec->prv; + ds.dec = dec; + ds.dec->errorStr = NULL; + ds.dec->errorOffset = NULL; + ds.objDepth = 0; + + ds.dec = dec; + + locale = setlocale(LC_NUMERIC, NULL); + if (!locale) { + return SetError(&ds, -1, "setlocale call failed"); + } + + if (strcmp(locale, "C")) { + size_t len = strlen(locale) + 1; + char *saved_locale = malloc(len); + if (saved_locale == NULL) { + return SetError(&ds, -1, "Could not reserve memory block"); + } + memcpy(saved_locale, locale, len); + setlocale(LC_NUMERIC, "C"); + ret = decode_any(&ds); + setlocale(LC_NUMERIC, saved_locale); + free(saved_locale); + } else { + ret = decode_any(&ds); + } + + if (ds.escHeap) { + dec->free(ds.escStart); + } + + SkipWhitespace(&ds); + + if (ds.start != ds.end && ret) { + dec->releaseObject(ds.prv, ret, ds.dec); + return SetError(&ds, -1, "Trailing data"); + } + + return ret; +} diff --git a/pandas/_libs/src/vendored/ujson/lib/ultrajsonenc.c b/pandas/_libs/src/vendored/ujson/lib/ultrajsonenc.c new file mode 100644 index 0000000000000..726676799af65 --- /dev/null +++ b/pandas/_libs/src/vendored/ujson/lib/ultrajsonenc.c @@ -0,0 +1,1207 @@ +/* +Copyright (c) 2011-2013, ESN Social Software AB and Jonas Tarnstrom +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the ESN Social Software AB nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL ESN SOCIAL SOFTWARE AB OR JONAS TARNSTROM BE +LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +Portions of code from MODP_ASCII - Ascii transformations (upper/lower, etc) +https://github.com/client9/stringencoders +Copyright (c) 2007 Nick Galbreath -- nickg [at] modp [dot] com. All rights +reserved. + +Numeric decoder derived from TCL library +https://www.opensource.apple.com/source/tcl/tcl-14/tcl/license.terms + * Copyright (c) 1988-1993 The Regents of the University of California. + * Copyright (c) 1994 Sun Microsystems, Inc. +*/ + +#include +#include +#include +#include +#include +#include +#include +#include "pandas/vendored/ujson/lib/ultrajson.h" + +#ifndef TRUE +#define TRUE 1 +#endif +#ifndef FALSE +#define FALSE 0 +#endif + +/* +Worst cases being: + +Control characters (ASCII < 32) +0x00 (1 byte) input => \u0000 output (6 bytes) +1 * 6 => 6 (6 bytes required) + +or UTF-16 surrogate pairs +4 bytes input in UTF-8 => \uXXXX\uYYYY (12 bytes). + +4 * 6 => 24 bytes (12 bytes required) + +The extra 2 bytes are for the quotes around the string + +*/ +#define RESERVE_STRING(_len) (2 + ((_len)*6)) + +static const double g_pow10[] = {1, + 10, + 100, + 1000, + 10000, + 100000, + 1000000, + 10000000, + 100000000, + 1000000000, + 10000000000, + 100000000000, + 1000000000000, + 10000000000000, + 100000000000000, + 1000000000000000}; +static const char g_hexChars[] = "0123456789abcdef"; +static const char g_escapeChars[] = "0123456789\\b\\t\\n\\f\\r\\\"\\\\\\/"; + +/* +FIXME: While this is fine dandy and working it's a magic value mess which +probably only the author understands. +Needs a cleanup and more documentation */ + +/* +Table for pure ascii output escaping all characters above 127 to \uXXXX */ +static const JSUINT8 g_asciiOutputTable[256] = { + /* 0x00 */ 0, + 30, + 30, + 30, + 30, + 30, + 30, + 30, + 10, + 12, + 14, + 30, + 16, + 18, + 30, + 30, + /* 0x10 */ 30, + 30, + 30, + 30, + 30, + 30, + 30, + 30, + 30, + 30, + 30, + 30, + 30, + 30, + 30, + 30, + /* 0x20 */ 1, + 1, + 20, + 1, + 1, + 1, + 29, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 24, + /* 0x30 */ 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 29, + 1, + 29, + 1, + /* 0x40 */ 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + /* 0x50 */ 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 22, + 1, + 1, + 1, + /* 0x60 */ 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + /* 0x70 */ 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + /* 0x80 */ 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + /* 0x90 */ 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + /* 0xa0 */ 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + /* 0xb0 */ 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + /* 0xc0 */ 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + /* 0xd0 */ 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + /* 0xe0 */ 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + /* 0xf0 */ 4, + 4, + 4, + 4, + 4, + 4, + 4, + 4, + 5, + 5, + 5, + 5, + 6, + 6, + 1, + 1}; + +static void SetError(JSOBJ obj, JSONObjectEncoder *enc, const char *message) { + enc->errorMsg = message; + enc->errorObj = obj; +} + +/* +FIXME: Keep track of how big these get across several encoder calls and try to +make an estimate +That way we won't run our head into the wall each call */ +void Buffer_Realloc(JSONObjectEncoder *enc, size_t cbNeeded) { + size_t curSize = enc->end - enc->start; + size_t newSize = curSize * 2; + size_t offset = enc->offset - enc->start; + + while (newSize < curSize + cbNeeded) { + newSize *= 2; + } + + if (enc->heap) { + enc->start = (char *)enc->realloc(enc->start, newSize); + if (!enc->start) { + SetError(NULL, enc, "Could not reserve memory block"); + return; + } + } else { + char *oldStart = enc->start; + enc->heap = 1; + enc->start = (char *)enc->malloc(newSize); + if (!enc->start) { + SetError(NULL, enc, "Could not reserve memory block"); + return; + } + memcpy(enc->start, oldStart, offset); + } + enc->offset = enc->start + offset; + enc->end = enc->start + newSize; +} + +INLINE_PREFIX void FASTCALL_MSVC +Buffer_AppendShortHexUnchecked(char *outputOffset, unsigned short value) { + *(outputOffset++) = g_hexChars[(value & 0xf000) >> 12]; + *(outputOffset++) = g_hexChars[(value & 0x0f00) >> 8]; + *(outputOffset++) = g_hexChars[(value & 0x00f0) >> 4]; + *(outputOffset++) = g_hexChars[(value & 0x000f) >> 0]; +} + +int Buffer_EscapeStringUnvalidated(JSONObjectEncoder *enc, const char *io, + const char *end) { + char *of = (char *)enc->offset; + + for (;;) { + switch (*io) { + case 0x00: { + if (io < end) { + *(of++) = '\\'; + *(of++) = 'u'; + *(of++) = '0'; + *(of++) = '0'; + *(of++) = '0'; + *(of++) = '0'; + break; + } else { + enc->offset += (of - enc->offset); + return TRUE; + } + } + case '\"': + (*of++) = '\\'; + (*of++) = '\"'; + break; + case '\\': + (*of++) = '\\'; + (*of++) = '\\'; + break; + case '/': + (*of++) = '\\'; + (*of++) = '/'; + break; + case '\b': + (*of++) = '\\'; + (*of++) = 'b'; + break; + case '\f': + (*of++) = '\\'; + (*of++) = 'f'; + break; + case '\n': + (*of++) = '\\'; + (*of++) = 'n'; + break; + case '\r': + (*of++) = '\\'; + (*of++) = 'r'; + break; + case '\t': + (*of++) = '\\'; + (*of++) = 't'; + break; + + case 0x26: // '/' + case 0x3c: // '<' + case 0x3e: // '>' + { + if (enc->encodeHTMLChars) { + // Fall through to \u00XX case below. + } else { + // Same as default case below. + (*of++) = (*io); + break; + } + } + case 0x01: + case 0x02: + case 0x03: + case 0x04: + case 0x05: + case 0x06: + case 0x07: + case 0x0b: + case 0x0e: + case 0x0f: + case 0x10: + case 0x11: + case 0x12: + case 0x13: + case 0x14: + case 0x15: + case 0x16: + case 0x17: + case 0x18: + case 0x19: + case 0x1a: + case 0x1b: + case 0x1c: + case 0x1d: + case 0x1e: + case 0x1f: { + *(of++) = '\\'; + *(of++) = 'u'; + *(of++) = '0'; + *(of++) = '0'; + *(of++) = g_hexChars[(unsigned char)(((*io) & 0xf0) >> 4)]; + *(of++) = g_hexChars[(unsigned char)((*io) & 0x0f)]; + break; + } + default: + (*of++) = (*io); + break; + } + io++; + } +} + +int Buffer_EscapeStringValidated(JSOBJ obj, JSONObjectEncoder *enc, + const char *io, const char *end) { + JSUTF32 ucs; + char *of = (char *)enc->offset; + + for (;;) { + JSUINT8 utflen = g_asciiOutputTable[(unsigned char)*io]; + + switch (utflen) { + case 0: { + if (io < end) { + *(of++) = '\\'; + *(of++) = 'u'; + *(of++) = '0'; + *(of++) = '0'; + *(of++) = '0'; + *(of++) = '0'; + io++; + continue; + } else { + enc->offset += (of - enc->offset); + return TRUE; + } + } + + case 1: { + *(of++) = (*io++); + continue; + } + + case 2: { + JSUTF32 in; + JSUTF16 in16; + + if (end - io < 1) { + enc->offset += (of - enc->offset); + SetError( + obj, enc, + "Unterminated UTF-8 sequence when encoding string"); + return FALSE; + } + + memcpy(&in16, io, sizeof(JSUTF16)); + in = (JSUTF32)in16; + +#ifdef __LITTLE_ENDIAN__ + ucs = ((in & 0x1f) << 6) | ((in >> 8) & 0x3f); +#else + ucs = ((in & 0x1f00) >> 2) | (in & 0x3f); +#endif + + if (ucs < 0x80) { + enc->offset += (of - enc->offset); + SetError(obj, enc, + "Overlong 2 byte UTF-8 sequence detected when " + "encoding string"); + return FALSE; + } + + io += 2; + break; + } + + case 3: { + JSUTF32 in; + JSUTF16 in16; + JSUINT8 in8; + + if (end - io < 2) { + enc->offset += (of - enc->offset); + SetError( + obj, enc, + "Unterminated UTF-8 sequence when encoding string"); + return FALSE; + } + + memcpy(&in16, io, sizeof(JSUTF16)); + memcpy(&in8, io + 2, sizeof(JSUINT8)); +#ifdef __LITTLE_ENDIAN__ + in = (JSUTF32)in16; + in |= in8 << 16; + ucs = ((in & 0x0f) << 12) | ((in & 0x3f00) >> 2) | + ((in & 0x3f0000) >> 16); +#else + in = in16 << 8; + in |= in8; + ucs = + ((in & 0x0f0000) >> 4) | ((in & 0x3f00) >> 2) | (in & 0x3f); +#endif + + if (ucs < 0x800) { + enc->offset += (of - enc->offset); + SetError(obj, enc, + "Overlong 3 byte UTF-8 sequence detected when " + "encoding string"); + return FALSE; + } + + io += 3; + break; + } + case 4: { + JSUTF32 in; + + if (end - io < 3) { + enc->offset += (of - enc->offset); + SetError( + obj, enc, + "Unterminated UTF-8 sequence when encoding string"); + return FALSE; + } + + memcpy(&in, io, sizeof(JSUTF32)); +#ifdef __LITTLE_ENDIAN__ + ucs = ((in & 0x07) << 18) | ((in & 0x3f00) << 4) | + ((in & 0x3f0000) >> 10) | ((in & 0x3f000000) >> 24); +#else + ucs = ((in & 0x07000000) >> 6) | ((in & 0x3f0000) >> 4) | + ((in & 0x3f00) >> 2) | (in & 0x3f); +#endif + if (ucs < 0x10000) { + enc->offset += (of - enc->offset); + SetError(obj, enc, + "Overlong 4 byte UTF-8 sequence detected when " + "encoding string"); + return FALSE; + } + + io += 4; + break; + } + + case 5: + case 6: { + enc->offset += (of - enc->offset); + SetError( + obj, enc, + "Unsupported UTF-8 sequence length when encoding string"); + return FALSE; + } + + case 29: { + if (enc->encodeHTMLChars) { + // Fall through to \u00XX case 30 below. + } else { + // Same as case 1 above. + *(of++) = (*io++); + continue; + } + } + + case 30: { + // \uXXXX encode + *(of++) = '\\'; + *(of++) = 'u'; + *(of++) = '0'; + *(of++) = '0'; + *(of++) = g_hexChars[(unsigned char)(((*io) & 0xf0) >> 4)]; + *(of++) = g_hexChars[(unsigned char)((*io) & 0x0f)]; + io++; + continue; + } + case 10: + case 12: + case 14: + case 16: + case 18: + case 20: + case 22: + case 24: { + *(of++) = *((char *)(g_escapeChars + utflen + 0)); + *(of++) = *((char *)(g_escapeChars + utflen + 1)); + io++; + continue; + } + // This can never happen, it's here to make L4 VC++ happy + default: { + ucs = 0; + break; + } + } + + /* + If the character is a UTF8 sequence of length > 1 we end up here */ + if (ucs >= 0x10000) { + ucs -= 0x10000; + *(of++) = '\\'; + *(of++) = 'u'; + Buffer_AppendShortHexUnchecked( + of, (unsigned short)(ucs >> 10) + 0xd800); + of += 4; + + *(of++) = '\\'; + *(of++) = 'u'; + Buffer_AppendShortHexUnchecked( + of, (unsigned short)(ucs & 0x3ff) + 0xdc00); + of += 4; + } else { + *(of++) = '\\'; + *(of++) = 'u'; + Buffer_AppendShortHexUnchecked(of, (unsigned short)ucs); + of += 4; + } + } +} + +#define Buffer_Reserve(__enc, __len) \ + if ( (size_t) ((__enc)->end - (__enc)->offset) < (size_t) (__len)) \ + { \ + Buffer_Realloc((__enc), (__len));\ + } \ + +#define Buffer_AppendCharUnchecked(__enc, __chr) *((__enc)->offset++) = __chr; + +INLINE_PREFIX void FASTCALL_MSVC strreverse(char *begin, + char *end) { + char aux; + while (end > begin) aux = *end, *end-- = *begin, *begin++ = aux; +} + +void Buffer_AppendIndentNewlineUnchecked(JSONObjectEncoder *enc) { + if (enc->indent > 0) Buffer_AppendCharUnchecked(enc, '\n'); +} + +// This function could be refactored to only accept enc as an argument, +// but this is a straight vendor from ujson source +void Buffer_AppendIndentUnchecked(JSONObjectEncoder *enc, JSINT32 value) { + int i; + if (enc->indent > 0) { + while (value-- > 0) + for (i = 0; i < enc->indent; i++) + Buffer_AppendCharUnchecked(enc, ' '); + } +} + +void Buffer_AppendIntUnchecked(JSONObjectEncoder *enc, JSINT32 value) { + char *wstr; + JSUINT32 uvalue = (value < 0) ? -value : value; + wstr = enc->offset; + + // Conversion. Number is reversed. + do { + *wstr++ = (char)(48 + (uvalue % 10)); + } while (uvalue /= 10); + if (value < 0) *wstr++ = '-'; + + // Reverse string + strreverse(enc->offset, wstr - 1); + enc->offset += (wstr - (enc->offset)); +} + +void Buffer_AppendLongUnchecked(JSONObjectEncoder *enc, JSINT64 value) { + char *wstr; + JSUINT64 uvalue = (value < 0) ? -value : value; + + wstr = enc->offset; + // Conversion. Number is reversed. + + do { + *wstr++ = (char)(48 + (uvalue % 10ULL)); + } while (uvalue /= 10ULL); + if (value < 0) *wstr++ = '-'; + + // Reverse string + strreverse(enc->offset, wstr - 1); + enc->offset += (wstr - (enc->offset)); +} + +int Buffer_AppendDoubleUnchecked(JSOBJ obj, JSONObjectEncoder *enc, + double value) { + /* if input is beyond the thresholds, revert to exponential */ + const double thres_max = (double)1e16 - 1; + const double thres_min = (double)1e-15; + char precision_str[20]; + int count; + double diff = 0.0; + char *str = enc->offset; + char *wstr = str; + unsigned long long whole; + double tmp; + unsigned long long frac; + int neg; + double pow10; + + if (value == HUGE_VAL || value == -HUGE_VAL) { + SetError(obj, enc, "Invalid Inf value when encoding double"); + return FALSE; + } + + if (!(value == value)) { + SetError(obj, enc, "Invalid Nan value when encoding double"); + return FALSE; + } + + /* we'll work in positive values and deal with the + negative sign issue later */ + neg = 0; + if (value < 0) { + neg = 1; + value = -value; + } + + /* + for very large or small numbers switch back to native sprintf for + exponentials. anyone want to write code to replace this? */ + if (value > thres_max || (value != 0.0 && fabs(value) < thres_min)) { + precision_str[0] = '%'; + precision_str[1] = '.'; +#if defined(_WIN32) && defined(_MSC_VER) + sprintf_s(precision_str + 2, sizeof(precision_str) - 2, "%ug", + enc->doublePrecision); + enc->offset += sprintf_s(str, enc->end - enc->offset, precision_str, + neg ? -value : value); +#else + snprintf(precision_str + 2, sizeof(precision_str) - 2, "%ug", + enc->doublePrecision); + enc->offset += snprintf(str, enc->end - enc->offset, precision_str, + neg ? -value : value); +#endif + return TRUE; + } + + pow10 = g_pow10[enc->doublePrecision]; + + whole = (unsigned long long)value; + tmp = (value - whole) * pow10; + frac = (unsigned long long)(tmp); + diff = tmp - frac; + + if (diff > 0.5) { + ++frac; + } else if (diff == 0.5 && ((frac == 0) || (frac & 1))) { + /* if halfway, round up if odd, OR + if last digit is 0. That last part is strange */ + ++frac; + } + + // handle rollover, e.g. + // case 0.99 with prec 1 is 1.0 and case 0.95 with prec is 1.0 as well + if (frac >= pow10) { + frac = 0; + ++whole; + } + + if (enc->doublePrecision == 0) { + diff = value - whole; + + if (diff > 0.5) { + /* greater than 0.5, round up, e.g. 1.6 -> 2 */ + ++whole; + } else if (diff == 0.5 && (whole & 1)) { + /* exactly 0.5 and ODD, then round up */ + /* 1.5 -> 2, but 2.5 -> 2 */ + ++whole; + } + + // vvvvvvvvvvvvvvvvvvv Diff from modp_dto2 + } else if (frac) { + count = enc->doublePrecision; + // now do fractional part, as an unsigned number + // we know it is not 0 but we can have leading zeros, these + // should be removed + while (!(frac % 10)) { + --count; + frac /= 10; + } + //^^^^^^^^^^^^^^^^^^^ Diff from modp_dto2 + + // now do fractional part, as an unsigned number + do { + --count; + *wstr++ = (char)(48 + (frac % 10)); + } while (frac /= 10); + // add extra 0s + while (count-- > 0) { + *wstr++ = '0'; + } + // add decimal + *wstr++ = '.'; + } else { + *wstr++ = '0'; + *wstr++ = '.'; + } + + // Do whole part. Take care of sign + // conversion. Number is reversed. + do { + *wstr++ = (char)(48 + (whole % 10)); + } while (whole /= 10); + + if (neg) { + *wstr++ = '-'; + } + strreverse(str, wstr - 1); + enc->offset += (wstr - (enc->offset)); + + return TRUE; +} + +/* +FIXME: +Handle integration functions returning NULL here */ + +/* +FIXME: +Perhaps implement recursion detection */ + +void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name, + size_t cbName) { + const char *value; + char *objName; + int count; + JSOBJ iterObj; + size_t szlen; + JSONTypeContext tc; + tc.encoder = enc; + + if (enc->level > enc->recursionMax) { + SetError(obj, enc, "Maximum recursion level reached"); + return; + } + + /* + This reservation must hold + + length of _name as encoded worst case + + maxLength of double to string OR maxLength of JSLONG to string + */ + + Buffer_Reserve(enc, 256 + RESERVE_STRING(cbName)); + if (enc->errorMsg) { + return; + } + + if (name) { + Buffer_AppendCharUnchecked(enc, '\"'); + + if (enc->forceASCII) { + if (!Buffer_EscapeStringValidated(obj, enc, name, name + cbName)) { + return; + } + } else { + if (!Buffer_EscapeStringUnvalidated(enc, name, name + cbName)) { + return; + } + } + + Buffer_AppendCharUnchecked(enc, '\"'); + + Buffer_AppendCharUnchecked(enc, ':'); +#ifndef JSON_NO_EXTRA_WHITESPACE + Buffer_AppendCharUnchecked(enc, ' '); +#endif + } + + enc->beginTypeContext(obj, &tc); + + switch (tc.type) { + case JT_INVALID: { + return; + } + + case JT_ARRAY: { + count = 0; + enc->iterBegin(obj, &tc); + + Buffer_AppendCharUnchecked(enc, '['); + Buffer_AppendIndentNewlineUnchecked(enc); + + while (enc->iterNext(obj, &tc)) { + if (count > 0) { + Buffer_AppendCharUnchecked(enc, ','); +#ifndef JSON_NO_EXTRA_WHITESPACE + Buffer_AppendCharUnchecked(buffer, ' '); +#endif + Buffer_AppendIndentNewlineUnchecked(enc); + } + + iterObj = enc->iterGetValue(obj, &tc); + + enc->level++; + Buffer_AppendIndentUnchecked(enc, enc->level); + encode(iterObj, enc, NULL, 0); + count++; + } + + enc->iterEnd(obj, &tc); + Buffer_AppendIndentNewlineUnchecked(enc); + Buffer_AppendIndentUnchecked(enc, enc->level); + Buffer_AppendCharUnchecked(enc, ']'); + break; + } + + case JT_OBJECT: { + count = 0; + enc->iterBegin(obj, &tc); + + Buffer_AppendCharUnchecked(enc, '{'); + Buffer_AppendIndentNewlineUnchecked(enc); + + while (enc->iterNext(obj, &tc)) { + if (count > 0) { + Buffer_AppendCharUnchecked(enc, ','); +#ifndef JSON_NO_EXTRA_WHITESPACE + Buffer_AppendCharUnchecked(enc, ' '); +#endif + Buffer_AppendIndentNewlineUnchecked(enc); + } + + iterObj = enc->iterGetValue(obj, &tc); + objName = enc->iterGetName(obj, &tc, &szlen); + + enc->level++; + Buffer_AppendIndentUnchecked(enc, enc->level); + encode(iterObj, enc, objName, szlen); + count++; + } + + enc->iterEnd(obj, &tc); + Buffer_AppendIndentNewlineUnchecked(enc); + Buffer_AppendIndentUnchecked(enc, enc->level); + Buffer_AppendCharUnchecked(enc, '}'); + break; + } + + case JT_LONG: { + Buffer_AppendLongUnchecked(enc, enc->getLongValue(obj, &tc)); + break; + } + + case JT_INT: { + Buffer_AppendIntUnchecked(enc, enc->getIntValue(obj, &tc)); + break; + } + + case JT_TRUE: { + Buffer_AppendCharUnchecked(enc, 't'); + Buffer_AppendCharUnchecked(enc, 'r'); + Buffer_AppendCharUnchecked(enc, 'u'); + Buffer_AppendCharUnchecked(enc, 'e'); + break; + } + + case JT_FALSE: { + Buffer_AppendCharUnchecked(enc, 'f'); + Buffer_AppendCharUnchecked(enc, 'a'); + Buffer_AppendCharUnchecked(enc, 'l'); + Buffer_AppendCharUnchecked(enc, 's'); + Buffer_AppendCharUnchecked(enc, 'e'); + break; + } + + case JT_NULL: { + Buffer_AppendCharUnchecked(enc, 'n'); + Buffer_AppendCharUnchecked(enc, 'u'); + Buffer_AppendCharUnchecked(enc, 'l'); + Buffer_AppendCharUnchecked(enc, 'l'); + break; + } + + case JT_DOUBLE: { + if (!Buffer_AppendDoubleUnchecked(obj, enc, + enc->getDoubleValue(obj, &tc))) { + enc->endTypeContext(obj, &tc); + enc->level--; + return; + } + break; + } + + case JT_UTF8: { + value = enc->getStringValue(obj, &tc, &szlen); + if (enc->errorMsg) { + enc->endTypeContext(obj, &tc); + return; + } + Buffer_Reserve(enc, RESERVE_STRING(szlen)); + Buffer_AppendCharUnchecked(enc, '\"'); + + if (enc->forceASCII) { + if (!Buffer_EscapeStringValidated(obj, enc, value, + value + szlen)) { + enc->endTypeContext(obj, &tc); + enc->level--; + return; + } + } else { + if (!Buffer_EscapeStringUnvalidated(enc, value, + value + szlen)) { + enc->endTypeContext(obj, &tc); + enc->level--; + return; + } + } + + Buffer_AppendCharUnchecked(enc, '\"'); + break; + } + + case JT_BIGNUM: { + value = enc->getBigNumStringValue(obj, &tc, &szlen); + + Buffer_Reserve(enc, RESERVE_STRING(szlen)); + if (enc->errorMsg) { + enc->endTypeContext(obj, &tc); + return; + } + + if (enc->forceASCII) { + if (!Buffer_EscapeStringValidated(obj, enc, value, + value + szlen)) { + enc->endTypeContext(obj, &tc); + enc->level--; + return; + } + } else { + if (!Buffer_EscapeStringUnvalidated(enc, value, + value + szlen)) { + enc->endTypeContext(obj, &tc); + enc->level--; + return; + } + } + + break; + } + } + + enc->endTypeContext(obj, &tc); + enc->level--; +} + +char *JSON_EncodeObject(JSOBJ obj, JSONObjectEncoder *enc, char *_buffer, + size_t _cbBuffer) { + char *locale; + enc->malloc = enc->malloc ? enc->malloc : malloc; + enc->free = enc->free ? enc->free : free; + enc->realloc = enc->realloc ? enc->realloc : realloc; + enc->errorMsg = NULL; + enc->errorObj = NULL; + enc->level = 0; + + if (enc->recursionMax < 1) { + enc->recursionMax = JSON_MAX_RECURSION_DEPTH; + } + + if (enc->doublePrecision < 0 || + enc->doublePrecision > JSON_DOUBLE_MAX_DECIMALS) { + enc->doublePrecision = JSON_DOUBLE_MAX_DECIMALS; + } + + if (_buffer == NULL) { + _cbBuffer = 32768; + enc->start = (char *)enc->malloc(_cbBuffer); + if (!enc->start) { + SetError(obj, enc, "Could not reserve memory block"); + return NULL; + } + enc->heap = 1; + } else { + enc->start = _buffer; + enc->heap = 0; + } + + enc->end = enc->start + _cbBuffer; + enc->offset = enc->start; + + locale = setlocale(LC_NUMERIC, NULL); + if (!locale) { + SetError(NULL, enc, "setlocale call failed"); + return NULL; + } + + if (strcmp(locale, "C")) { + size_t len = strlen(locale) + 1; + char *saved_locale = malloc(len); + if (saved_locale == NULL) { + SetError(NULL, enc, "Could not reserve memory block"); + return NULL; + } + memcpy(saved_locale, locale, len); + setlocale(LC_NUMERIC, "C"); + encode(obj, enc, NULL, 0); + setlocale(LC_NUMERIC, saved_locale); + free(saved_locale); + } else { + encode(obj, enc, NULL, 0); + } + + Buffer_Reserve(enc, 1); + if (enc->errorMsg) { + return NULL; + } + Buffer_AppendCharUnchecked(enc, '\0'); + + return enc->start; +} diff --git a/pandas/_libs/src/vendored/ujson/python/JSONtoObj.c b/pandas/_libs/src/vendored/ujson/python/JSONtoObj.c new file mode 100644 index 0000000000000..f4055fcedcfa6 --- /dev/null +++ b/pandas/_libs/src/vendored/ujson/python/JSONtoObj.c @@ -0,0 +1,520 @@ +/* +Copyright (c) 2011-2013, ESN Social Software AB and Jonas Tarnstrom +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the ESN Social Software AB nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL ESN SOCIAL SOFTWARE AB OR JONAS TARNSTROM BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +Portions of code from MODP_ASCII - Ascii transformations (upper/lower, etc) +https://github.com/client9/stringencoders +Copyright (c) 2007 Nick Galbreath -- nickg [at] modp [dot] com. All rights reserved. + +Numeric decoder derived from TCL library +https://www.opensource.apple.com/source/tcl/tcl-14/tcl/license.terms + * Copyright (c) 1988-1993 The Regents of the University of California. + * Copyright (c) 1994 Sun Microsystems, Inc. +*/ + +#define PY_ARRAY_UNIQUE_SYMBOL UJSON_NUMPY +#define NO_IMPORT_ARRAY +#define PY_SSIZE_T_CLEAN +#include +#include +#include "pandas/vendored/ujson/lib/ultrajson.h" + +#define PRINTMARK() + +typedef struct __PyObjectDecoder { + JSONObjectDecoder dec; + + void *npyarr; // Numpy context buffer + void *npyarr_addr; // Ref to npyarr ptr to track DECREF calls + npy_intp curdim; // Current array dimension + + PyArray_Descr *dtype; +} PyObjectDecoder; + +typedef struct __NpyArrContext { + PyObject *ret; + PyObject *labels[2]; + PyArray_Dims shape; + + PyObjectDecoder *dec; + + npy_intp i; + npy_intp elsize; + npy_intp elcount; +} NpyArrContext; + +// Numpy handling based on numpy internal code, specifically the function +// PyArray_FromIter. + +// numpy related functions are inter-dependent so declare them all here, +// to ensure the compiler catches any errors + +// standard numpy array handling +JSOBJ Object_npyNewArray(void *prv, void *decoder); +JSOBJ Object_npyEndArray(void *prv, JSOBJ obj); +int Object_npyArrayAddItem(void *prv, JSOBJ obj, JSOBJ value); + +// for more complex dtypes (object and string) fill a standard Python list +// and convert to a numpy array when done. +JSOBJ Object_npyNewArrayList(void *prv, void *decoder); +JSOBJ Object_npyEndArrayList(void *prv, JSOBJ obj); +int Object_npyArrayListAddItem(void *prv, JSOBJ obj, JSOBJ value); + +// free the numpy context buffer +void Npy_releaseContext(NpyArrContext *npyarr) { + PRINTMARK(); + if (npyarr) { + if (npyarr->shape.ptr) { + PyObject_Free(npyarr->shape.ptr); + } + if (npyarr->dec) { + npyarr->dec->npyarr = NULL; + npyarr->dec->curdim = 0; + } + Py_XDECREF(npyarr->labels[0]); + Py_XDECREF(npyarr->labels[1]); + Py_XDECREF(npyarr->ret); + PyObject_Free(npyarr); + } +} + +JSOBJ Object_npyNewArray(void *prv, void *_decoder) { + NpyArrContext *npyarr; + PyObjectDecoder *decoder = (PyObjectDecoder *)_decoder; + PRINTMARK(); + if (decoder->curdim <= 0) { + // start of array - initialise the context buffer + npyarr = decoder->npyarr = PyObject_Malloc(sizeof(NpyArrContext)); + decoder->npyarr_addr = npyarr; + + if (!npyarr) { + PyErr_NoMemory(); + return NULL; + } + + npyarr->dec = decoder; + npyarr->labels[0] = npyarr->labels[1] = NULL; + + npyarr->shape.ptr = PyObject_Malloc(sizeof(npy_intp) * NPY_MAXDIMS); + npyarr->shape.len = 1; + npyarr->ret = NULL; + + npyarr->elsize = 0; + npyarr->elcount = 4; + npyarr->i = 0; + } else { + // starting a new dimension continue the current array (and reshape + // after) + npyarr = (NpyArrContext *)decoder->npyarr; + if (decoder->curdim >= npyarr->shape.len) { + npyarr->shape.len++; + } + } + + npyarr->shape.ptr[decoder->curdim] = 0; + decoder->curdim++; + return npyarr; +} + +PyObject *Npy_returnLabelled(NpyArrContext *npyarr) { + PyObject *ret = npyarr->ret; + npy_intp i; + + if (npyarr->labels[0] || npyarr->labels[1]) { + // finished decoding, build tuple with values and labels + ret = PyTuple_New(npyarr->shape.len + 1); + for (i = 0; i < npyarr->shape.len; i++) { + if (npyarr->labels[i]) { + PyTuple_SET_ITEM(ret, i + 1, npyarr->labels[i]); + npyarr->labels[i] = NULL; + } else { + Py_INCREF(Py_None); + PyTuple_SET_ITEM(ret, i + 1, Py_None); + } + } + PyTuple_SET_ITEM(ret, 0, npyarr->ret); + } + + return ret; +} + +JSOBJ Object_npyEndArray(void *prv, JSOBJ obj) { + PyObject *ret; + char *new_data; + NpyArrContext *npyarr = (NpyArrContext *)obj; + int emptyType = NPY_DEFAULT_TYPE; + npy_intp i; + PRINTMARK(); + if (!npyarr) { + return NULL; + } + + ret = npyarr->ret; + i = npyarr->i; + + npyarr->dec->curdim--; + + if (i == 0 || !npyarr->ret) { + // empty array would not have been initialised so do it now. + if (npyarr->dec->dtype) { + emptyType = npyarr->dec->dtype->type_num; + } + npyarr->ret = ret = + PyArray_EMPTY(npyarr->shape.len, npyarr->shape.ptr, emptyType, 0); + } else if (npyarr->dec->curdim <= 0) { + // realloc to final size + new_data = PyDataMem_RENEW(PyArray_DATA(ret), i * npyarr->elsize); + if (new_data == NULL) { + PyErr_NoMemory(); + Npy_releaseContext(npyarr); + return NULL; + } + ((PyArrayObject *)ret)->data = (void *)new_data; + // PyArray_BYTES(ret) = new_data; + } + + if (npyarr->dec->curdim <= 0) { + // finished decoding array, reshape if necessary + if (npyarr->shape.len > 1) { + npyarr->ret = PyArray_Newshape((PyArrayObject *)ret, &npyarr->shape, + NPY_ANYORDER); + Py_DECREF(ret); + } + + ret = Npy_returnLabelled(npyarr); + + npyarr->ret = NULL; + Npy_releaseContext(npyarr); + } + + return ret; +} + +int Object_npyArrayAddItem(void *prv, JSOBJ obj, JSOBJ value) { + PyObject *type; + PyArray_Descr *dtype; + npy_intp i; + char *new_data, *item; + NpyArrContext *npyarr = (NpyArrContext *)obj; + PRINTMARK(); + if (!npyarr) { + return 0; + } + + i = npyarr->i; + + npyarr->shape.ptr[npyarr->dec->curdim - 1]++; + + if (PyArray_Check((PyObject *)value)) { + // multidimensional array, keep decoding values. + return 1; + } + + if (!npyarr->ret) { + // Array not initialised yet. + // We do it here so we can 'sniff' the data type if none was provided + if (!npyarr->dec->dtype) { + type = PyObject_Type(value); + if (!PyArray_DescrConverter(type, &dtype)) { + Py_DECREF(type); + goto fail; + } + Py_INCREF(dtype); + Py_DECREF(type); + } else { + dtype = PyArray_DescrNew(npyarr->dec->dtype); + } + + // If it's an object or string then fill a Python list and subsequently + // convert. Otherwise we would need to somehow mess about with + // reference counts when renewing memory. + npyarr->elsize = dtype->elsize; + if (PyDataType_REFCHK(dtype) || npyarr->elsize == 0) { + Py_XDECREF(dtype); + + if (npyarr->dec->curdim > 1) { + PyErr_SetString(PyExc_ValueError, + "Cannot decode multidimensional arrays with " + "variable length elements to numpy"); + goto fail; + } + npyarr->elcount = 0; + npyarr->ret = PyList_New(0); + if (!npyarr->ret) { + goto fail; + } + ((JSONObjectDecoder *)npyarr->dec)->newArray = + Object_npyNewArrayList; + ((JSONObjectDecoder *)npyarr->dec)->arrayAddItem = + Object_npyArrayListAddItem; + ((JSONObjectDecoder *)npyarr->dec)->endArray = + Object_npyEndArrayList; + return Object_npyArrayListAddItem(prv, obj, value); + } + + npyarr->ret = PyArray_NewFromDescr( + &PyArray_Type, dtype, 1, &npyarr->elcount, NULL, NULL, 0, NULL); + + if (!npyarr->ret) { + goto fail; + } + } + + if (i >= npyarr->elcount) { + // Grow PyArray_DATA(ret): + // this is similar for the strategy for PyListObject, but we use + // 50% overallocation => 0, 4, 8, 14, 23, 36, 56, 86 ... + if (npyarr->elsize == 0) { + PyErr_SetString(PyExc_ValueError, + "Cannot decode multidimensional arrays with " + "variable length elements to numpy"); + goto fail; + } + + npyarr->elcount = (i >> 1) + (i < 4 ? 4 : 2) + i; + if (npyarr->elcount <= NPY_MAX_INTP / npyarr->elsize) { + new_data = PyDataMem_RENEW(PyArray_DATA(npyarr->ret), + npyarr->elcount * npyarr->elsize); + } else { + PyErr_NoMemory(); + goto fail; + } + ((PyArrayObject *)npyarr->ret)->data = (void *)new_data; + + // PyArray_BYTES(npyarr->ret) = new_data; + } + + PyArray_DIMS(npyarr->ret)[0] = i + 1; + + if ((item = PyArray_GETPTR1(npyarr->ret, i)) == NULL || + PyArray_SETITEM(npyarr->ret, item, value) == -1) { + goto fail; + } + + Py_DECREF((PyObject *)value); + npyarr->i++; + return 1; + +fail: + + Npy_releaseContext(npyarr); + return 0; +} + +JSOBJ Object_npyNewArrayList(void *prv, void *_decoder) { + PyObjectDecoder *decoder = (PyObjectDecoder *)_decoder; + PRINTMARK(); + PyErr_SetString( + PyExc_ValueError, + "nesting not supported for object or variable length dtypes"); + Npy_releaseContext(decoder->npyarr); + return NULL; +} + +JSOBJ Object_npyEndArrayList(void *prv, JSOBJ obj) { + PyObject *list, *ret; + NpyArrContext *npyarr = (NpyArrContext *)obj; + PRINTMARK(); + if (!npyarr) { + return NULL; + } + + // convert decoded list to numpy array + list = (PyObject *)npyarr->ret; + npyarr->ret = PyArray_FROM_O(list); + + ret = Npy_returnLabelled(npyarr); + npyarr->ret = list; + + ((JSONObjectDecoder *)npyarr->dec)->newArray = Object_npyNewArray; + ((JSONObjectDecoder *)npyarr->dec)->arrayAddItem = Object_npyArrayAddItem; + ((JSONObjectDecoder *)npyarr->dec)->endArray = Object_npyEndArray; + Npy_releaseContext(npyarr); + return ret; +} + +int Object_npyArrayListAddItem(void *prv, JSOBJ obj, JSOBJ value) { + NpyArrContext *npyarr = (NpyArrContext *)obj; + PRINTMARK(); + if (!npyarr) { + return 0; + } + PyList_Append((PyObject *)npyarr->ret, value); + Py_DECREF((PyObject *)value); + npyarr->elcount++; + return 1; +} + +int Object_objectAddKey(void *prv, JSOBJ obj, JSOBJ name, JSOBJ value) { + int ret = PyDict_SetItem(obj, name, value); + Py_DECREF((PyObject *)name); + Py_DECREF((PyObject *)value); + return ret == 0 ? 1 : 0; +} + +int Object_arrayAddItem(void *prv, JSOBJ obj, JSOBJ value) { + int ret = PyList_Append(obj, value); + Py_DECREF((PyObject *)value); + return ret == 0 ? 1 : 0; +} + +JSOBJ Object_newString(void *prv, wchar_t *start, wchar_t *end) { + return PyUnicode_FromWideChar(start, (end - start)); +} + +JSOBJ Object_newTrue(void *prv) { Py_RETURN_TRUE; } + +JSOBJ Object_newFalse(void *prv) { Py_RETURN_FALSE; } + +JSOBJ Object_newNull(void *prv) { Py_RETURN_NONE; } + +JSOBJ Object_newPosInf(void *prv) { return PyFloat_FromDouble(Py_HUGE_VAL); } + +JSOBJ Object_newNegInf(void *prv) { return PyFloat_FromDouble(-Py_HUGE_VAL); } + +JSOBJ Object_newObject(void *prv, void *decoder) { return PyDict_New(); } + +JSOBJ Object_endObject(void *prv, JSOBJ obj) { return obj; } + +JSOBJ Object_newArray(void *prv, void *decoder) { return PyList_New(0); } + +JSOBJ Object_endArray(void *prv, JSOBJ obj) { return obj; } + +JSOBJ Object_newInteger(void *prv, JSINT32 value) { + return PyLong_FromLong((long)value); +} + +JSOBJ Object_newLong(void *prv, JSINT64 value) { + return PyLong_FromLongLong(value); +} + +JSOBJ Object_newUnsignedLong(void *prv, JSUINT64 value) { + return PyLong_FromUnsignedLongLong(value); +} + +JSOBJ Object_newDouble(void *prv, double value) { + return PyFloat_FromDouble(value); +} + +static void Object_releaseObject(void *prv, JSOBJ obj, void *_decoder) { + PyObjectDecoder *decoder = (PyObjectDecoder *)_decoder; + if (obj != decoder->npyarr_addr) { + Py_XDECREF(((PyObject *)obj)); + } +} + +static char *g_kwlist[] = {"obj", "precise_float", + "labelled", "dtype", NULL}; + +PyObject *JSONToObj(PyObject *self, PyObject *args, PyObject *kwargs) { + PyObject *ret; + PyObject *sarg; + PyObject *arg; + PyObject *opreciseFloat = NULL; + JSONObjectDecoder *decoder; + PyObjectDecoder pyDecoder; + PyArray_Descr *dtype = NULL; + int labelled = 0; + + JSONObjectDecoder dec = { + Object_newString, Object_objectAddKey, Object_arrayAddItem, + Object_newTrue, Object_newFalse, Object_newNull, + Object_newPosInf, Object_newNegInf, Object_newObject, + Object_endObject, Object_newArray, Object_endArray, + Object_newInteger, Object_newLong, Object_newUnsignedLong, + Object_newDouble, + Object_releaseObject, PyObject_Malloc, PyObject_Free, + PyObject_Realloc}; + + dec.preciseFloat = 0; + dec.prv = NULL; + + pyDecoder.dec = dec; + pyDecoder.curdim = 0; + pyDecoder.npyarr = NULL; + pyDecoder.npyarr_addr = NULL; + + decoder = (JSONObjectDecoder *)&pyDecoder; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|OiiO&", g_kwlist, &arg, + &opreciseFloat, &labelled, + PyArray_DescrConverter2, &dtype)) { + Npy_releaseContext(pyDecoder.npyarr); + return NULL; + } + + if (opreciseFloat && PyObject_IsTrue(opreciseFloat)) { + decoder->preciseFloat = 1; + } + + if (PyBytes_Check(arg)) { + sarg = arg; + } else if (PyUnicode_Check(arg)) { + sarg = PyUnicode_AsUTF8String(arg); + if (sarg == NULL) { + // Exception raised above us by codec according to docs + return NULL; + } + } else { + PyErr_Format(PyExc_TypeError, "Expected 'str' or 'bytes'"); + return NULL; + } + + decoder->errorStr = NULL; + decoder->errorOffset = NULL; + + ret = JSON_DecodeObject(decoder, PyBytes_AS_STRING(sarg), + PyBytes_GET_SIZE(sarg)); + + if (sarg != arg) { + Py_DECREF(sarg); + } + + if (PyErr_Occurred()) { + if (ret) { + Py_DECREF((PyObject *)ret); + } + Npy_releaseContext(pyDecoder.npyarr); + return NULL; + } + + if (decoder->errorStr) { + /* + FIXME: It's possible to give a much nicer error message here with actual + failing element in input etc*/ + + PyErr_Format(PyExc_ValueError, "%s", decoder->errorStr); + + if (ret) { + Py_DECREF((PyObject *)ret); + } + Npy_releaseContext(pyDecoder.npyarr); + + return NULL; + } + + return ret; +} diff --git a/pandas/_libs/src/vendored/ujson/python/objToJSON.c b/pandas/_libs/src/vendored/ujson/python/objToJSON.c new file mode 100644 index 0000000000000..65b468f268d75 --- /dev/null +++ b/pandas/_libs/src/vendored/ujson/python/objToJSON.c @@ -0,0 +1,2135 @@ +/* +Copyright (c) 2011-2013, ESN Social Software AB and Jonas Tarnstrom +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: +* Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +* Neither the name of the ESN Social Software AB nor the +names of its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL ESN SOCIAL SOFTWARE AB OR JONAS TARNSTROM BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF +THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +Portions of code from MODP_ASCII - Ascii transformations (upper/lower, etc) +https://github.com/client9/stringencoders +Copyright (c) 2007 Nick Galbreath -- nickg [at] modp [dot] com. All rights +reserved. + +Numeric decoder derived from TCL library +https://www.opensource.apple.com/source/tcl/tcl-14/tcl/license.terms +* Copyright (c) 1988-1993 The Regents of the University of California. +* Copyright (c) 1994 Sun Microsystems, Inc. +*/ + +#define PY_SSIZE_T_CLEAN +#include +#include + +#define NO_IMPORT_ARRAY +#define PY_ARRAY_UNIQUE_SYMBOL UJSON_NUMPY +#include +#include +#include +#include +#include "pandas/vendored/ujson/lib/ultrajson.h" +#include "datetime.h" +#include "pandas/datetime/pd_datetime.h" + +npy_int64 get_nat(void) { return NPY_MIN_INT64; } + +typedef char *(*PFN_PyTypeToUTF8)(JSOBJ obj, JSONTypeContext *ti, + size_t *_outLen); + +int object_is_decimal_type(PyObject *obj); +int object_is_dataframe_type(PyObject *obj); +int object_is_series_type(PyObject *obj); +int object_is_index_type(PyObject *obj); +int object_is_nat_type(PyObject *obj); +int object_is_na_type(PyObject *obj); + +typedef struct __NpyArrContext { + PyObject *array; + char *dataptr; + int curdim; // current dimension in array's order + int stridedim; // dimension we are striding over + int inc; // stride dimension increment (+/- 1) + npy_intp dim; + npy_intp stride; + npy_intp ndim; + npy_intp index[NPY_MAXDIMS]; + int type_num; + PyArray_GetItemFunc *getitem; + + char **rowLabels; + char **columnLabels; +} NpyArrContext; + +typedef struct __PdBlockContext { + int colIdx; + int ncols; + int transpose; + + NpyArrContext **npyCtxts; // NpyArrContext for each column +} PdBlockContext; + +typedef struct __TypeContext { + JSPFN_ITERBEGIN iterBegin; + JSPFN_ITEREND iterEnd; + JSPFN_ITERNEXT iterNext; + JSPFN_ITERGETNAME iterGetName; + JSPFN_ITERGETVALUE iterGetValue; + PFN_PyTypeToUTF8 PyTypeToUTF8; + PyObject *newObj; + PyObject *dictObj; + Py_ssize_t index; + Py_ssize_t size; + PyObject *itemValue; + PyObject *itemName; + PyObject *attrList; + PyObject *iterator; + + double doubleValue; + JSINT64 longValue; + + char *cStr; + NpyArrContext *npyarr; + PdBlockContext *pdblock; + int transpose; + char **rowLabels; + char **columnLabels; + npy_intp rowLabelsLen; + npy_intp columnLabelsLen; +} TypeContext; + +typedef struct __PyObjectEncoder { + JSONObjectEncoder enc; + + // pass through the NpyArrContext when encoding multi-dimensional arrays + NpyArrContext *npyCtxtPassthru; + + // pass through the PdBlockContext when encoding blocks + PdBlockContext *blkCtxtPassthru; + + // pass-through to encode numpy data directly + int npyType; + void *npyValue; + + int datetimeIso; + NPY_DATETIMEUNIT datetimeUnit; + + // output format style for pandas data types + int outputFormat; + int originalOutputFormat; + + PyObject *defaultHandler; +} PyObjectEncoder; + +#define GET_TC(__ptrtc) ((TypeContext *)((__ptrtc)->prv)) + +enum PANDAS_FORMAT { SPLIT, RECORDS, INDEX, COLUMNS, VALUES }; + +int PdBlock_iterNext(JSOBJ, JSONTypeContext *); + +static TypeContext *createTypeContext(void) { + TypeContext *pc; + + pc = PyObject_Malloc(sizeof(TypeContext)); + if (!pc) { + PyErr_NoMemory(); + return NULL; + } + pc->newObj = NULL; + pc->dictObj = NULL; + pc->itemValue = NULL; + pc->itemName = NULL; + pc->attrList = NULL; + pc->index = 0; + pc->size = 0; + pc->longValue = 0; + pc->doubleValue = 0.0; + pc->cStr = NULL; + pc->npyarr = NULL; + pc->pdblock = NULL; + pc->rowLabels = NULL; + pc->columnLabels = NULL; + pc->transpose = 0; + pc->rowLabelsLen = 0; + pc->columnLabelsLen = 0; + + return pc; +} + +static PyObject *get_values(PyObject *obj) { + PyObject *values = NULL; + + if (object_is_index_type(obj) || object_is_series_type(obj)) { + // The special cases to worry about are dt64tz and category[dt64tz]. + // In both cases we want the UTC-localized datetime64 ndarray, + // without going through and object array of Timestamps. + if (PyObject_HasAttrString(obj, "tz")) { + PyObject *tz = PyObject_GetAttrString(obj, "tz"); + if (tz != Py_None) { + // Go through object array if we have dt64tz, since tz info will + // be lost if values is used directly. + Py_DECREF(tz); + values = PyObject_CallMethod(obj, "__array__", NULL); + return values; + } + Py_DECREF(tz); + } + values = PyObject_GetAttrString(obj, "values"); + if (values == NULL) { + // Clear so we can subsequently try another method + PyErr_Clear(); + } else if (PyObject_HasAttrString(values, "__array__")) { + // We may have gotten a Categorical or Sparse array so call np.array + PyObject *array_values = PyObject_CallMethod(values, "__array__", + NULL); + Py_DECREF(values); + values = array_values; + } else if (!PyArray_CheckExact(values)) { + // Didn't get a numpy array, so keep trying + Py_DECREF(values); + values = NULL; + } + } + + if (values == NULL) { + PyObject *typeRepr = PyObject_Repr((PyObject *)Py_TYPE(obj)); + PyObject *repr; + if (PyObject_HasAttrString(obj, "dtype")) { + PyObject *dtype = PyObject_GetAttrString(obj, "dtype"); + repr = PyObject_Repr(dtype); + Py_DECREF(dtype); + } else { + repr = PyUnicode_FromString(""); + } + + PyErr_Format(PyExc_ValueError, "%R or %R are not JSON serializable yet", + repr, typeRepr); + Py_DECREF(repr); + Py_DECREF(typeRepr); + + return NULL; + } + + return values; +} + +static PyObject *get_sub_attr(PyObject *obj, char *attr, char *subAttr) { + PyObject *tmp = PyObject_GetAttrString(obj, attr); + PyObject *ret; + + if (tmp == 0) { + return 0; + } + ret = PyObject_GetAttrString(tmp, subAttr); + Py_DECREF(tmp); + + return ret; +} + +static Py_ssize_t get_attr_length(PyObject *obj, char *attr) { + PyObject *tmp = PyObject_GetAttrString(obj, attr); + Py_ssize_t ret; + + if (tmp == 0) { + return 0; + } + ret = PyObject_Length(tmp); + Py_DECREF(tmp); + + if (ret == -1) { + return 0; + } + + return ret; +} + +static int is_simple_frame(PyObject *obj) { + PyObject *mgr = PyObject_GetAttrString(obj, "_mgr"); + if (!mgr) { + return 0; + } + int ret; + if (PyObject_HasAttrString(mgr, "blocks")) { + ret = (get_attr_length(mgr, "blocks") <= 1); + } else { + ret = 0; + } + + Py_DECREF(mgr); + return ret; +} + +static npy_int64 get_long_attr(PyObject *o, const char *attr) { + // NB we are implicitly assuming that o is a Timedelta or Timestamp, or NaT + + npy_int64 long_val; + PyObject *value = PyObject_GetAttrString(o, attr); + long_val = + (PyLong_Check(value) ? PyLong_AsLongLong(value) : PyLong_AsLong(value)); + + Py_DECREF(value); + + if (object_is_nat_type(o)) { + // i.e. o is NaT, long_val will be NPY_MIN_INT64 + return long_val; + } + + // ensure we are in nanoseconds, similar to Timestamp._as_creso or _as_unit + PyObject* reso = PyObject_GetAttrString(o, "_creso"); + if (!PyLong_Check(reso)) { + // https://github.com/pandas-dev/pandas/pull/49034#discussion_r1023165139 + Py_DECREF(reso); + return -1; + } + + long cReso = PyLong_AsLong(reso); + Py_DECREF(reso); + if (cReso == -1 && PyErr_Occurred()) { + return -1; + } + + if (cReso == NPY_FR_us) { + long_val = long_val * 1000L; + } else if (cReso == NPY_FR_ms) { + long_val = long_val * 1000000L; + } else if (cReso == NPY_FR_s) { + long_val = long_val * 1000000000L; + } + + return long_val; +} + +static npy_float64 total_seconds(PyObject *td) { + npy_float64 double_val; + PyObject *value = PyObject_CallMethod(td, "total_seconds", NULL); + double_val = PyFloat_AS_DOUBLE(value); + Py_DECREF(value); + return double_val; +} + +static char *PyBytesToUTF8(JSOBJ _obj, JSONTypeContext *Py_UNUSED(tc), + size_t *_outLen) { + PyObject *obj = (PyObject *)_obj; + *_outLen = PyBytes_GET_SIZE(obj); + return PyBytes_AS_STRING(obj); +} + +static char *PyUnicodeToUTF8(JSOBJ _obj, JSONTypeContext *tc, + size_t *_outLen) { + char *encoded = (char *)PyUnicode_AsUTF8AndSize(_obj, + (Py_ssize_t *)_outLen); + if (encoded == NULL) { + /* Something went wrong. + Set errorMsg(to tell encoder to stop), + and let Python exception propagate. */ + JSONObjectEncoder *enc = (JSONObjectEncoder *)tc->encoder; + enc->errorMsg = "Encoding failed."; + } + return encoded; +} + +/* JSON callback. returns a char* and mutates the pointer to *len */ +static char *NpyDateTimeToIsoCallback(JSOBJ Py_UNUSED(unused), + JSONTypeContext *tc, size_t *len) { + NPY_DATETIMEUNIT base = ((PyObjectEncoder *)tc->encoder)->datetimeUnit; + GET_TC(tc)->cStr = int64ToIso(GET_TC(tc)->longValue, base, len); + return GET_TC(tc)->cStr; +} + +/* JSON callback. returns a char* and mutates the pointer to *len */ +static char *NpyTimeDeltaToIsoCallback(JSOBJ Py_UNUSED(unused), + JSONTypeContext *tc, size_t *len) { + GET_TC(tc)->cStr = int64ToIsoDuration(GET_TC(tc)->longValue, len); + return GET_TC(tc)->cStr; +} + +/* JSON callback */ +static char *PyDateTimeToIsoCallback(JSOBJ obj, JSONTypeContext *tc, + size_t *len) { + if (!PyDate_Check(obj)) { + PyErr_SetString(PyExc_TypeError, "Expected date object"); + return NULL; + } + + NPY_DATETIMEUNIT base = ((PyObjectEncoder *)tc->encoder)->datetimeUnit; + return PyDateTimeToIso(obj, base, len); +} + +static char *PyTimeToJSON(JSOBJ _obj, JSONTypeContext *tc, size_t *outLen) { + PyObject *obj = (PyObject *)_obj; + PyObject *str; + PyObject *tmp; + + str = PyObject_CallMethod(obj, "isoformat", NULL); + if (str == NULL) { + *outLen = 0; + if (!PyErr_Occurred()) { + PyErr_SetString(PyExc_ValueError, "Failed to convert time"); + } + ((JSONObjectEncoder *)tc->encoder)->errorMsg = ""; + return NULL; + } + if (PyUnicode_Check(str)) { + tmp = str; + str = PyUnicode_AsUTF8String(str); + Py_DECREF(tmp); + } + + GET_TC(tc)->newObj = str; + + *outLen = PyBytes_GET_SIZE(str); + char *outValue = PyBytes_AS_STRING(str); + return outValue; +} + +//============================================================================= +// Numpy array iteration functions +//============================================================================= + +static void NpyArr_freeItemValue(JSOBJ Py_UNUSED(_obj), JSONTypeContext *tc) { + if (GET_TC(tc)->npyarr && + GET_TC(tc)->itemValue != GET_TC(tc)->npyarr->array) { + Py_XDECREF(GET_TC(tc)->itemValue); + GET_TC(tc)->itemValue = NULL; + } +} + +int NpyArr_iterNextNone(JSOBJ Py_UNUSED(_obj), JSONTypeContext *Py_UNUSED(tc)) { + return 0; +} + +void NpyArr_iterBegin(JSOBJ _obj, JSONTypeContext *tc) { + PyArrayObject *obj; + NpyArrContext *npyarr; + + if (GET_TC(tc)->newObj) { + obj = (PyArrayObject *)GET_TC(tc)->newObj; + } else { + obj = (PyArrayObject *)_obj; + } + + npyarr = PyObject_Malloc(sizeof(NpyArrContext)); + GET_TC(tc)->npyarr = npyarr; + + if (!npyarr) { + PyErr_NoMemory(); + GET_TC(tc)->iterNext = NpyArr_iterNextNone; + return; + } + + npyarr->array = (PyObject *)obj; + npyarr->getitem = (PyArray_GetItemFunc *)PyArray_DESCR(obj)->f->getitem; + npyarr->dataptr = PyArray_DATA(obj); + npyarr->ndim = PyArray_NDIM(obj) - 1; + npyarr->curdim = 0; + npyarr->type_num = PyArray_DESCR(obj)->type_num; + + if (GET_TC(tc)->transpose) { + npyarr->dim = PyArray_DIM(obj, npyarr->ndim); + npyarr->stride = PyArray_STRIDE(obj, npyarr->ndim); + npyarr->stridedim = npyarr->ndim; + npyarr->index[npyarr->ndim] = 0; + npyarr->inc = -1; + } else { + npyarr->dim = PyArray_DIM(obj, 0); + npyarr->stride = PyArray_STRIDE(obj, 0); + npyarr->stridedim = 0; + npyarr->index[0] = 0; + npyarr->inc = 1; + } + + npyarr->columnLabels = GET_TC(tc)->columnLabels; + npyarr->rowLabels = GET_TC(tc)->rowLabels; +} + +void NpyArr_iterEnd(JSOBJ obj, JSONTypeContext *tc) { + NpyArrContext *npyarr = GET_TC(tc)->npyarr; + + if (npyarr) { + NpyArr_freeItemValue(obj, tc); + PyObject_Free(npyarr); + } +} + +void NpyArrPassThru_iterBegin(JSOBJ Py_UNUSED(obj), + JSONTypeContext *Py_UNUSED(tc)) {} + +void NpyArrPassThru_iterEnd(JSOBJ obj, JSONTypeContext *tc) { + NpyArrContext *npyarr = GET_TC(tc)->npyarr; + // finished this dimension, reset the data pointer + npyarr->curdim--; + npyarr->dataptr -= npyarr->stride * npyarr->index[npyarr->stridedim]; + npyarr->stridedim -= npyarr->inc; + npyarr->dim = PyArray_DIM(npyarr->array, npyarr->stridedim); + npyarr->stride = PyArray_STRIDE(npyarr->array, npyarr->stridedim); + npyarr->dataptr += npyarr->stride; + + NpyArr_freeItemValue(obj, tc); +} + +int NpyArr_iterNextItem(JSOBJ obj, JSONTypeContext *tc) { + NpyArrContext *npyarr = GET_TC(tc)->npyarr; + + if (PyErr_Occurred()) { + return 0; + } + + if (npyarr->index[npyarr->stridedim] >= npyarr->dim) { + return 0; + } + + NpyArr_freeItemValue(obj, tc); + + if (PyArray_ISDATETIME(npyarr->array)) { + GET_TC(tc)->itemValue = obj; + Py_INCREF(obj); + ((PyObjectEncoder *)tc->encoder)->npyType = PyArray_TYPE(npyarr->array); + ((PyObjectEncoder *)tc->encoder)->npyValue = npyarr->dataptr; + ((PyObjectEncoder *)tc->encoder)->npyCtxtPassthru = npyarr; + } else { + GET_TC(tc)->itemValue = npyarr->getitem(npyarr->dataptr, npyarr->array); + } + + npyarr->dataptr += npyarr->stride; + npyarr->index[npyarr->stridedim]++; + return 1; +} + +int NpyArr_iterNext(JSOBJ _obj, JSONTypeContext *tc) { + NpyArrContext *npyarr = GET_TC(tc)->npyarr; + + if (PyErr_Occurred()) { + return 0; + } + + if (npyarr->curdim >= npyarr->ndim || + npyarr->index[npyarr->stridedim] >= npyarr->dim) { + // innermost dimension, start retrieving item values + GET_TC(tc)->iterNext = NpyArr_iterNextItem; + return NpyArr_iterNextItem(_obj, tc); + } + + // dig a dimension deeper + npyarr->index[npyarr->stridedim]++; + + npyarr->curdim++; + npyarr->stridedim += npyarr->inc; + npyarr->dim = PyArray_DIM(npyarr->array, npyarr->stridedim); + npyarr->stride = PyArray_STRIDE(npyarr->array, npyarr->stridedim); + npyarr->index[npyarr->stridedim] = 0; + + ((PyObjectEncoder *)tc->encoder)->npyCtxtPassthru = npyarr; + GET_TC(tc)->itemValue = npyarr->array; + return 1; +} + +JSOBJ NpyArr_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + return GET_TC(tc)->itemValue; +} + +char *NpyArr_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc, + size_t *outLen) { + NpyArrContext *npyarr = GET_TC(tc)->npyarr; + npy_intp idx; + char *cStr; + + if (GET_TC(tc)->iterNext == NpyArr_iterNextItem) { + idx = npyarr->index[npyarr->stridedim] - 1; + cStr = npyarr->columnLabels[idx]; + } else { + idx = npyarr->index[npyarr->stridedim - npyarr->inc] - 1; + cStr = npyarr->rowLabels[idx]; + } + + *outLen = strlen(cStr); + + return cStr; +} + +//============================================================================= +// Pandas block iteration functions +// +// Serialises a DataFrame column by column to avoid unnecessary data copies and +// more representative serialisation when dealing with mixed dtypes. +// +// Uses a dedicated NpyArrContext for each column. +//============================================================================= + +void PdBlockPassThru_iterEnd(JSOBJ obj, JSONTypeContext *tc) { + PdBlockContext *blkCtxt = GET_TC(tc)->pdblock; + + if (blkCtxt->transpose) { + blkCtxt->colIdx++; + } else { + blkCtxt->colIdx = 0; + } + + NpyArr_freeItemValue(obj, tc); +} + +int PdBlock_iterNextItem(JSOBJ obj, JSONTypeContext *tc) { + PdBlockContext *blkCtxt = GET_TC(tc)->pdblock; + + if (blkCtxt->colIdx >= blkCtxt->ncols) { + return 0; + } + + GET_TC(tc)->npyarr = blkCtxt->npyCtxts[blkCtxt->colIdx]; + blkCtxt->colIdx++; + return NpyArr_iterNextItem(obj, tc); +} + +char *PdBlock_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc, + size_t *outLen) { + PdBlockContext *blkCtxt = GET_TC(tc)->pdblock; + NpyArrContext *npyarr = blkCtxt->npyCtxts[0]; + npy_intp idx; + char *cStr; + + if (GET_TC(tc)->iterNext == PdBlock_iterNextItem) { + idx = blkCtxt->colIdx - 1; + cStr = npyarr->columnLabels[idx]; + } else { + idx = GET_TC(tc)->iterNext != PdBlock_iterNext + ? npyarr->index[npyarr->stridedim - npyarr->inc] - 1 + : npyarr->index[npyarr->stridedim]; + + cStr = npyarr->rowLabels[idx]; + } + + *outLen = strlen(cStr); + return cStr; +} + +char *PdBlock_iterGetName_Transpose(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc, + size_t *outLen) { + PdBlockContext *blkCtxt = GET_TC(tc)->pdblock; + NpyArrContext *npyarr = blkCtxt->npyCtxts[blkCtxt->colIdx]; + npy_intp idx; + char *cStr; + + if (GET_TC(tc)->iterNext == NpyArr_iterNextItem) { + idx = npyarr->index[npyarr->stridedim] - 1; + cStr = npyarr->columnLabels[idx]; + } else { + idx = blkCtxt->colIdx; + cStr = npyarr->rowLabels[idx]; + } + + *outLen = strlen(cStr); + return cStr; +} + +int PdBlock_iterNext(JSOBJ obj, JSONTypeContext *tc) { + PdBlockContext *blkCtxt = GET_TC(tc)->pdblock; + NpyArrContext *npyarr; + + if (PyErr_Occurred() || ((JSONObjectEncoder *)tc->encoder)->errorMsg) { + return 0; + } + + if (blkCtxt->transpose) { + if (blkCtxt->colIdx >= blkCtxt->ncols) { + return 0; + } + } else { + npyarr = blkCtxt->npyCtxts[0]; + if (npyarr->index[npyarr->stridedim] >= npyarr->dim) { + return 0; + } + } + + ((PyObjectEncoder *)tc->encoder)->blkCtxtPassthru = blkCtxt; + GET_TC(tc)->itemValue = obj; + + return 1; +} + +void PdBlockPassThru_iterBegin(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + PdBlockContext *blkCtxt = GET_TC(tc)->pdblock; + + if (blkCtxt->transpose) { + // if transposed we exhaust each column before moving to the next + GET_TC(tc)->iterNext = NpyArr_iterNextItem; + GET_TC(tc)->iterGetName = PdBlock_iterGetName_Transpose; + GET_TC(tc)->npyarr = blkCtxt->npyCtxts[blkCtxt->colIdx]; + } +} + +void PdBlock_iterBegin(JSOBJ _obj, JSONTypeContext *tc) { + PyObject *obj, *values, *arrays, *array; + PdBlockContext *blkCtxt; + NpyArrContext *npyarr; + Py_ssize_t i; + + obj = (PyObject *)_obj; + + GET_TC(tc)->iterGetName = GET_TC(tc)->transpose + ? PdBlock_iterGetName_Transpose + : PdBlock_iterGetName; + + blkCtxt = PyObject_Malloc(sizeof(PdBlockContext)); + if (!blkCtxt) { + PyErr_NoMemory(); + GET_TC(tc)->iterNext = NpyArr_iterNextNone; + return; + } + GET_TC(tc)->pdblock = blkCtxt; + + blkCtxt->colIdx = 0; + blkCtxt->transpose = GET_TC(tc)->transpose; + blkCtxt->ncols = get_attr_length(obj, "columns"); + + if (blkCtxt->ncols == 0) { + blkCtxt->npyCtxts = NULL; + + GET_TC(tc)->iterNext = NpyArr_iterNextNone; + return; + } + + blkCtxt->npyCtxts = + PyObject_Malloc(sizeof(NpyArrContext *) * blkCtxt->ncols); + if (!blkCtxt->npyCtxts) { + PyErr_NoMemory(); + GET_TC(tc)->iterNext = NpyArr_iterNextNone; + return; + } + + arrays = get_sub_attr(obj, "_mgr", "column_arrays"); + if (!arrays) { + GET_TC(tc)->iterNext = NpyArr_iterNextNone; + return; + } + + for (i = 0; i < PyObject_Length(arrays); i++) { + array = PyList_GET_ITEM(arrays, i); + if (!array) { + GET_TC(tc)->iterNext = NpyArr_iterNextNone; + goto ARR_RET; + } + + // ensure we have a numpy array (i.e. np.asarray) + values = PyObject_CallMethod(array, "__array__", NULL); + if ((!values) || (!PyArray_CheckExact(values))) { + // Didn't get a numpy array + ((JSONObjectEncoder *)tc->encoder)->errorMsg = ""; + GET_TC(tc)->iterNext = NpyArr_iterNextNone; + goto ARR_RET; + } + + GET_TC(tc)->newObj = values; + + // init a dedicated context for this column + NpyArr_iterBegin(obj, tc); + npyarr = GET_TC(tc)->npyarr; + + GET_TC(tc)->itemValue = NULL; + ((PyObjectEncoder *)tc->encoder)->npyCtxtPassthru = NULL; + + blkCtxt->npyCtxts[i] = npyarr; + GET_TC(tc)->newObj = NULL; + } + GET_TC(tc)->npyarr = blkCtxt->npyCtxts[0]; + goto ARR_RET; + +ARR_RET: + Py_DECREF(arrays); +} + +void PdBlock_iterEnd(JSOBJ obj, JSONTypeContext *tc) { + PdBlockContext *blkCtxt; + NpyArrContext *npyarr; + int i; + + GET_TC(tc)->itemValue = NULL; + npyarr = GET_TC(tc)->npyarr; + + blkCtxt = GET_TC(tc)->pdblock; + + if (blkCtxt) { + for (i = 0; i < blkCtxt->ncols; i++) { + npyarr = blkCtxt->npyCtxts[i]; + if (npyarr) { + if (npyarr->array) { + Py_DECREF(npyarr->array); + npyarr->array = NULL; + } + + GET_TC(tc)->npyarr = npyarr; + NpyArr_iterEnd(obj, tc); + + blkCtxt->npyCtxts[i] = NULL; + } + } + + if (blkCtxt->npyCtxts) { + PyObject_Free(blkCtxt->npyCtxts); + } + PyObject_Free(blkCtxt); + } +} + +//============================================================================= +// Tuple iteration functions +// itemValue is borrowed reference, no ref counting +//============================================================================= +void Tuple_iterBegin(JSOBJ obj, JSONTypeContext *tc) { + GET_TC(tc)->index = 0; + GET_TC(tc)->size = PyTuple_GET_SIZE((PyObject *)obj); + GET_TC(tc)->itemValue = NULL; +} + +int Tuple_iterNext(JSOBJ obj, JSONTypeContext *tc) { + PyObject *item; + + if (GET_TC(tc)->index >= GET_TC(tc)->size) { + return 0; + } + + item = PyTuple_GET_ITEM(obj, GET_TC(tc)->index); + + GET_TC(tc)->itemValue = item; + GET_TC(tc)->index++; + return 1; +} + +void Tuple_iterEnd(JSOBJ Py_UNUSED(obj), JSONTypeContext *Py_UNUSED(tc)) {} + +JSOBJ Tuple_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + return GET_TC(tc)->itemValue; +} + +char *Tuple_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *Py_UNUSED(tc), + size_t *Py_UNUSED(outLen)) { + return NULL; +} + +//============================================================================= +// Set iteration functions +// itemValue is borrowed reference, no ref counting +//============================================================================= +void Set_iterBegin(JSOBJ obj, JSONTypeContext *tc) { + GET_TC(tc)->itemValue = NULL; + GET_TC(tc)->iterator = PyObject_GetIter(obj); +} + +int Set_iterNext(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + PyObject *item; + + if (GET_TC(tc)->itemValue) { + Py_DECREF(GET_TC(tc)->itemValue); + GET_TC(tc)->itemValue = NULL; + } + + item = PyIter_Next(GET_TC(tc)->iterator); + + if (item == NULL) { + return 0; + } + + GET_TC(tc)->itemValue = item; + return 1; +} + +void Set_iterEnd(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + if (GET_TC(tc)->itemValue) { + Py_DECREF(GET_TC(tc)->itemValue); + GET_TC(tc)->itemValue = NULL; + } + + if (GET_TC(tc)->iterator) { + Py_DECREF(GET_TC(tc)->iterator); + GET_TC(tc)->iterator = NULL; + } +} + +JSOBJ Set_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + return GET_TC(tc)->itemValue; +} + +char *Set_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *Py_UNUSED(tc), + size_t *Py_UNUSED(outLen)) { + return NULL; +} + +//============================================================================= +// Dir iteration functions +// itemName ref is borrowed from PyObject_Dir (attrList). No refcount +// itemValue ref is from PyObject_GetAttr. Ref counted +//============================================================================= +void Dir_iterBegin(JSOBJ obj, JSONTypeContext *tc) { + GET_TC(tc)->attrList = PyObject_Dir(obj); + GET_TC(tc)->index = 0; + GET_TC(tc)->size = PyList_GET_SIZE(GET_TC(tc)->attrList); +} + +void Dir_iterEnd(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + if (GET_TC(tc)->itemValue) { + Py_DECREF(GET_TC(tc)->itemValue); + GET_TC(tc)->itemValue = NULL; + } + + if (GET_TC(tc)->itemName) { + Py_DECREF(GET_TC(tc)->itemName); + GET_TC(tc)->itemName = NULL; + } + + Py_DECREF((PyObject *)GET_TC(tc)->attrList); +} + +int Dir_iterNext(JSOBJ _obj, JSONTypeContext *tc) { + PyObject *obj = (PyObject *)_obj; + PyObject *itemValue = GET_TC(tc)->itemValue; + PyObject *itemName = GET_TC(tc)->itemName; + PyObject *attr; + PyObject *attrName; + char *attrStr; + + if (PyErr_Occurred() || ((JSONObjectEncoder *)tc->encoder)->errorMsg) { + return 0; + } + + if (itemValue) { + Py_DECREF(GET_TC(tc)->itemValue); + GET_TC(tc)->itemValue = itemValue = NULL; + } + + if (itemName) { + Py_DECREF(GET_TC(tc)->itemName); + GET_TC(tc)->itemName = itemName = NULL; + } + + for (; GET_TC(tc)->index < GET_TC(tc)->size; GET_TC(tc)->index++) { + attrName = PyList_GET_ITEM(GET_TC(tc)->attrList, GET_TC(tc)->index); + attr = PyUnicode_AsUTF8String(attrName); + attrStr = PyBytes_AS_STRING(attr); + + if (attrStr[0] == '_') { + Py_DECREF(attr); + continue; + } + + itemValue = PyObject_GetAttr(obj, attrName); + if (itemValue == NULL) { + PyErr_Clear(); + Py_DECREF(attr); + continue; + } + + if (PyCallable_Check(itemValue)) { + Py_DECREF(itemValue); + Py_DECREF(attr); + continue; + } + + GET_TC(tc)->itemName = itemName; + GET_TC(tc)->itemValue = itemValue; + + itemName = attr; + break; + } + + if (itemName == NULL) { + GET_TC(tc)->index = GET_TC(tc)->size; + GET_TC(tc)->itemValue = NULL; + return 0; + } + + GET_TC(tc)->itemName = itemName; + GET_TC(tc)->itemValue = itemValue; + GET_TC(tc)->index++; + + return 1; +} + +JSOBJ Dir_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + return GET_TC(tc)->itemValue; +} + +char *Dir_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc, + size_t *outLen) { + *outLen = PyBytes_GET_SIZE(GET_TC(tc)->itemName); + return PyBytes_AS_STRING(GET_TC(tc)->itemName); +} + +//============================================================================= +// List iteration functions +// itemValue is borrowed from object (which is list). No refcounting +//============================================================================= +void List_iterBegin(JSOBJ obj, JSONTypeContext *tc) { + GET_TC(tc)->index = 0; + GET_TC(tc)->size = PyList_GET_SIZE((PyObject *)obj); +} + +int List_iterNext(JSOBJ obj, JSONTypeContext *tc) { + if (GET_TC(tc)->index >= GET_TC(tc)->size) { + return 0; + } + + GET_TC(tc)->itemValue = PyList_GET_ITEM(obj, GET_TC(tc)->index); + GET_TC(tc)->index++; + return 1; +} + +void List_iterEnd(JSOBJ Py_UNUSED(obj), JSONTypeContext *Py_UNUSED(tc)) {} + +JSOBJ List_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + return GET_TC(tc)->itemValue; +} + +char *List_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *Py_UNUSED(tc), + size_t *Py_UNUSED(outLen)) { + return NULL; +} + +//============================================================================= +// pandas Index iteration functions +//============================================================================= +void Index_iterBegin(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + GET_TC(tc)->index = 0; + GET_TC(tc)->cStr = PyObject_Malloc(20 * sizeof(char)); + if (!GET_TC(tc)->cStr) { + PyErr_NoMemory(); + } +} + +int Index_iterNext(JSOBJ obj, JSONTypeContext *tc) { + Py_ssize_t index; + if (!GET_TC(tc)->cStr) { + return 0; + } + + index = GET_TC(tc)->index; + Py_XDECREF(GET_TC(tc)->itemValue); + if (index == 0) { + memcpy(GET_TC(tc)->cStr, "name", sizeof(char) * 5); + GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "name"); + } else if (index == 1) { + memcpy(GET_TC(tc)->cStr, "data", sizeof(char) * 5); + GET_TC(tc)->itemValue = get_values(obj); + if (!GET_TC(tc)->itemValue) { + return 0; + } + } else { + return 0; + } + + GET_TC(tc)->index++; + return 1; +} + +void Index_iterEnd(JSOBJ Py_UNUSED(obj), JSONTypeContext *Py_UNUSED(tc)) {} + +JSOBJ Index_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + return GET_TC(tc)->itemValue; +} + +char *Index_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc, + size_t *outLen) { + *outLen = strlen(GET_TC(tc)->cStr); + return GET_TC(tc)->cStr; +} + +//============================================================================= +// pandas Series iteration functions +//============================================================================= +void Series_iterBegin(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + PyObjectEncoder *enc = (PyObjectEncoder *)tc->encoder; + GET_TC(tc)->index = 0; + GET_TC(tc)->cStr = PyObject_Malloc(20 * sizeof(char)); + enc->outputFormat = VALUES; // for contained series + if (!GET_TC(tc)->cStr) { + PyErr_NoMemory(); + } +} + +int Series_iterNext(JSOBJ obj, JSONTypeContext *tc) { + Py_ssize_t index; + if (!GET_TC(tc)->cStr) { + return 0; + } + + index = GET_TC(tc)->index; + Py_XDECREF(GET_TC(tc)->itemValue); + if (index == 0) { + memcpy(GET_TC(tc)->cStr, "name", sizeof(char) * 5); + GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "name"); + } else if (index == 1) { + memcpy(GET_TC(tc)->cStr, "index", sizeof(char) * 6); + GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "index"); + } else if (index == 2) { + memcpy(GET_TC(tc)->cStr, "data", sizeof(char) * 5); + GET_TC(tc)->itemValue = get_values(obj); + if (!GET_TC(tc)->itemValue) { + return 0; + } + } else { + return 0; + } + + GET_TC(tc)->index++; + return 1; +} + +void Series_iterEnd(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + PyObjectEncoder *enc = (PyObjectEncoder *)tc->encoder; + enc->outputFormat = enc->originalOutputFormat; +} + +JSOBJ Series_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + return GET_TC(tc)->itemValue; +} + +char *Series_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc, + size_t *outLen) { + *outLen = strlen(GET_TC(tc)->cStr); + return GET_TC(tc)->cStr; +} + +//============================================================================= +// pandas DataFrame iteration functions +//============================================================================= +void DataFrame_iterBegin(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + PyObjectEncoder *enc = (PyObjectEncoder *)tc->encoder; + GET_TC(tc)->index = 0; + GET_TC(tc)->cStr = PyObject_Malloc(20 * sizeof(char)); + enc->outputFormat = VALUES; // for contained series & index + if (!GET_TC(tc)->cStr) { + PyErr_NoMemory(); + } +} + +int DataFrame_iterNext(JSOBJ obj, JSONTypeContext *tc) { + Py_ssize_t index; + if (!GET_TC(tc)->cStr) { + return 0; + } + + index = GET_TC(tc)->index; + Py_XDECREF(GET_TC(tc)->itemValue); + if (index == 0) { + memcpy(GET_TC(tc)->cStr, "columns", sizeof(char) * 8); + GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "columns"); + } else if (index == 1) { + memcpy(GET_TC(tc)->cStr, "index", sizeof(char) * 6); + GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "index"); + } else if (index == 2) { + memcpy(GET_TC(tc)->cStr, "data", sizeof(char) * 5); + if (is_simple_frame(obj)) { + GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "values"); + if (!GET_TC(tc)->itemValue) { + return 0; + } + } else { + Py_INCREF(obj); + GET_TC(tc)->itemValue = obj; + } + } else { + return 0; + } + + GET_TC(tc)->index++; + return 1; +} + +void DataFrame_iterEnd(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + PyObjectEncoder *enc = (PyObjectEncoder *)tc->encoder; + enc->outputFormat = enc->originalOutputFormat; +} + +JSOBJ DataFrame_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + return GET_TC(tc)->itemValue; +} + +char *DataFrame_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc, + size_t *outLen) { + *outLen = strlen(GET_TC(tc)->cStr); + return GET_TC(tc)->cStr; +} + +//============================================================================= +// Dict iteration functions +// itemName might converted to string (Python_Str). Do refCounting +// itemValue is borrowed from object (which is dict). No refCounting +//============================================================================= +void Dict_iterBegin(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + GET_TC(tc)->index = 0; +} + +int Dict_iterNext(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + PyObject *itemNameTmp; + + if (GET_TC(tc)->itemName) { + Py_DECREF(GET_TC(tc)->itemName); + GET_TC(tc)->itemName = NULL; + } + + if (!PyDict_Next((PyObject *)GET_TC(tc)->dictObj, &GET_TC(tc)->index, + &GET_TC(tc)->itemName, &GET_TC(tc)->itemValue)) { + return 0; + } + + if (PyUnicode_Check(GET_TC(tc)->itemName)) { + GET_TC(tc)->itemName = PyUnicode_AsUTF8String(GET_TC(tc)->itemName); + } else if (!PyBytes_Check(GET_TC(tc)->itemName)) { + GET_TC(tc)->itemName = PyObject_Str(GET_TC(tc)->itemName); + itemNameTmp = GET_TC(tc)->itemName; + GET_TC(tc)->itemName = PyUnicode_AsUTF8String(GET_TC(tc)->itemName); + Py_DECREF(itemNameTmp); + } else { + Py_INCREF(GET_TC(tc)->itemName); + } + return 1; +} + +void Dict_iterEnd(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + if (GET_TC(tc)->itemName) { + Py_DECREF(GET_TC(tc)->itemName); + GET_TC(tc)->itemName = NULL; + } + Py_DECREF(GET_TC(tc)->dictObj); +} + +JSOBJ Dict_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + return GET_TC(tc)->itemValue; +} + +char *Dict_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc, + size_t *outLen) { + *outLen = PyBytes_GET_SIZE(GET_TC(tc)->itemName); + return PyBytes_AS_STRING(GET_TC(tc)->itemName); +} + +void NpyArr_freeLabels(char **labels, npy_intp len) { + npy_intp i; + + if (labels) { + for (i = 0; i < len; i++) { + PyObject_Free(labels[i]); + } + PyObject_Free(labels); + } +} + +/* + * Function: NpyArr_encodeLabels + * ----------------------------- + * + * Builds an array of "encoded" labels. + * + * labels: PyArrayObject pointer for labels to be "encoded" + * num : number of labels + * + * "encode" is quoted above because we aren't really doing encoding + * For historical reasons this function would actually encode the entire + * array into a separate buffer with a separate call to JSON_Encode + * and would leave it to complex pointer manipulation from there to + * unpack values as needed. To make things simpler and more idiomatic + * this has instead just stringified any input save for datetime values, + * which may need to be represented in various formats. + */ +char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc, + npy_intp num) { + // NOTE this function steals a reference to labels. + PyObject *item = NULL; + size_t len; + npy_intp i, stride; + char **ret; + char *dataptr, *cLabel; + int type_num; + NPY_DATETIMEUNIT base = enc->datetimeUnit; + + if (!labels) { + return 0; + } + + if (PyArray_SIZE(labels) < num) { + PyErr_SetString( + PyExc_ValueError, + "Label array sizes do not match corresponding data shape"); + Py_DECREF(labels); + return 0; + } + + ret = PyObject_Malloc(sizeof(char *) * num); + if (!ret) { + PyErr_NoMemory(); + Py_DECREF(labels); + return 0; + } + + for (i = 0; i < num; i++) { + ret[i] = NULL; + } + + stride = PyArray_STRIDE(labels, 0); + dataptr = PyArray_DATA(labels); + type_num = PyArray_TYPE(labels); + + for (i = 0; i < num; i++) { + item = PyArray_GETITEM(labels, dataptr); + if (!item) { + NpyArr_freeLabels(ret, num); + ret = 0; + break; + } + + int is_datetimelike = 0; + npy_int64 nanosecVal; + if (PyTypeNum_ISDATETIME(type_num)) { + is_datetimelike = 1; + PyArray_VectorUnaryFunc *castfunc = + PyArray_GetCastFunc(PyArray_DescrFromType(type_num), NPY_INT64); + if (!castfunc) { + PyErr_Format(PyExc_ValueError, + "Cannot cast numpy dtype %d to long", + enc->npyType); + } + castfunc(dataptr, &nanosecVal, 1, NULL, NULL); + } else if (PyDate_Check(item) || PyDelta_Check(item)) { + is_datetimelike = 1; + if (PyObject_HasAttrString(item, "_value")) { + // see test_date_index_and_values for case with non-nano + nanosecVal = get_long_attr(item, "_value"); + } else { + if (PyDelta_Check(item)) { + nanosecVal = total_seconds(item) * + 1000000000LL; // nanoseconds per second + } else { + // datetime.* objects don't follow above rules + nanosecVal = PyDateTimeToEpoch(item, NPY_FR_ns); + } + } + } + + if (is_datetimelike) { + if (nanosecVal == get_nat()) { + len = 4; + cLabel = PyObject_Malloc(len + 1); + strncpy(cLabel, "null", len + 1); + } else { + if (enc->datetimeIso) { + if ((type_num == NPY_TIMEDELTA) || (PyDelta_Check(item))) { + cLabel = int64ToIsoDuration(nanosecVal, &len); + } else { + if (type_num == NPY_DATETIME) { + cLabel = int64ToIso(nanosecVal, base, &len); + } else { + cLabel = PyDateTimeToIso(item, base, &len); + } + } + if (cLabel == NULL) { + Py_DECREF(item); + NpyArr_freeLabels(ret, num); + ret = 0; + break; + } + } else { + int size_of_cLabel = 21; // 21 chars for int 64 + cLabel = PyObject_Malloc(size_of_cLabel); + snprintf(cLabel, size_of_cLabel, "%" NPY_DATETIME_FMT, + NpyDateTimeToEpoch(nanosecVal, base)); + len = strlen(cLabel); + } + } + } else { // Fallback to string representation + // Replace item with the string to keep it alive. + Py_SETREF(item, PyObject_Str(item)); + if (item == NULL) { + NpyArr_freeLabels(ret, num); + ret = 0; + break; + } + + cLabel = (char *)PyUnicode_AsUTF8(item); + len = strlen(cLabel); + } + + // Add 1 to include NULL terminator + ret[i] = PyObject_Malloc(len + 1); + memcpy(ret[i], cLabel, len + 1); + Py_DECREF(item); + + if (is_datetimelike) { + PyObject_Free(cLabel); + } + + if (PyErr_Occurred()) { + NpyArr_freeLabels(ret, num); + ret = 0; + break; + } + + if (!ret[i]) { + PyErr_NoMemory(); + ret = 0; + break; + } + + dataptr += stride; + } + + Py_DECREF(labels); + return ret; +} + +void Object_invokeDefaultHandler(PyObject *obj, PyObjectEncoder *enc) { + PyObject *tmpObj = NULL; + tmpObj = PyObject_CallFunctionObjArgs(enc->defaultHandler, obj, NULL); + if (!PyErr_Occurred()) { + if (tmpObj == NULL) { + PyErr_SetString(PyExc_TypeError, + "Failed to execute default handler"); + } else { + encode(tmpObj, (JSONObjectEncoder *)enc, NULL, 0); + } + } + Py_XDECREF(tmpObj); + return; +} + +void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) { + PyObject *obj, *exc, *toDictFunc, *tmpObj, *values; + TypeContext *pc; + PyObjectEncoder *enc; + double val; + npy_int64 value; + int unit; + + tc->prv = NULL; + + if (!_obj) { + tc->type = JT_INVALID; + return; + } + + obj = (PyObject *)_obj; + enc = (PyObjectEncoder *)tc->encoder; + + if (PyBool_Check(obj)) { + tc->type = (obj == Py_True) ? JT_TRUE : JT_FALSE; + return; + } else if (obj == Py_None) { + tc->type = JT_NULL; + return; + } + + pc = createTypeContext(); + if (!pc) { + tc->type = JT_INVALID; + return; + } + tc->prv = pc; + + if (PyTypeNum_ISDATETIME(enc->npyType)) { + int64_t longVal; + PyArray_VectorUnaryFunc *castfunc = + PyArray_GetCastFunc(PyArray_DescrFromType(enc->npyType), NPY_INT64); + if (!castfunc) { + PyErr_Format(PyExc_ValueError, "Cannot cast numpy dtype %d to long", + enc->npyType); + } + castfunc(enc->npyValue, &longVal, 1, NULL, NULL); + if (longVal == get_nat()) { + tc->type = JT_NULL; + } else { + if (enc->datetimeIso) { + if (enc->npyType == NPY_TIMEDELTA) { + pc->PyTypeToUTF8 = NpyTimeDeltaToIsoCallback; + } else { + pc->PyTypeToUTF8 = NpyDateTimeToIsoCallback; + } + // Currently no way to pass longVal to iso function, so use + // state management + GET_TC(tc)->longValue = longVal; + tc->type = JT_UTF8; + } else { + NPY_DATETIMEUNIT base = + ((PyObjectEncoder *)tc->encoder)->datetimeUnit; + GET_TC(tc)->longValue = NpyDateTimeToEpoch(longVal, base); + tc->type = JT_LONG; + } + } + + // TODO(username): this prevents infinite loop with + // mixed-type DataFrames; + // refactor + enc->npyCtxtPassthru = NULL; + enc->npyType = -1; + return; + } + + if (PyIter_Check(obj) || + (PyArray_Check(obj) && !PyArray_CheckScalar(obj))) { + goto ISITERABLE; + } + + if (PyLong_Check(obj)) { + tc->type = JT_LONG; + int overflow = 0; + GET_TC(tc)->longValue = PyLong_AsLongLongAndOverflow(obj, &overflow); + int err; + err = (GET_TC(tc)->longValue == -1) && PyErr_Occurred(); + + if (overflow) { + tc->type = JT_BIGNUM; + } else if (err) { + goto INVALID; + } + + return; + } else if (PyFloat_Check(obj)) { + val = PyFloat_AS_DOUBLE(obj); + if (npy_isnan(val) || npy_isinf(val)) { + tc->type = JT_NULL; + } else { + GET_TC(tc)->doubleValue = val; + tc->type = JT_DOUBLE; + } + return; + } else if (PyBytes_Check(obj)) { + pc->PyTypeToUTF8 = PyBytesToUTF8; + tc->type = JT_UTF8; + return; + } else if (PyUnicode_Check(obj)) { + pc->PyTypeToUTF8 = PyUnicodeToUTF8; + tc->type = JT_UTF8; + return; + } else if (object_is_decimal_type(obj)) { + GET_TC(tc)->doubleValue = PyFloat_AsDouble(obj); + tc->type = JT_DOUBLE; + return; + } else if (PyDateTime_Check(obj) || PyDate_Check(obj)) { + if (object_is_nat_type(obj)) { + tc->type = JT_NULL; + return; + } + + if (enc->datetimeIso) { + pc->PyTypeToUTF8 = PyDateTimeToIsoCallback; + tc->type = JT_UTF8; + } else { + NPY_DATETIMEUNIT base = + ((PyObjectEncoder *)tc->encoder)->datetimeUnit; + GET_TC(tc)->longValue = PyDateTimeToEpoch(obj, base); + tc->type = JT_LONG; + } + return; + } else if (PyTime_Check(obj)) { + pc->PyTypeToUTF8 = PyTimeToJSON; + tc->type = JT_UTF8; + return; + } else if (PyArray_IsScalar(obj, Datetime)) { + if (((PyDatetimeScalarObject *)obj)->obval == get_nat()) { + tc->type = JT_NULL; + return; + } + + if (enc->datetimeIso) { + pc->PyTypeToUTF8 = PyDateTimeToIsoCallback; + tc->type = JT_UTF8; + } else { + NPY_DATETIMEUNIT base = + ((PyObjectEncoder *)tc->encoder)->datetimeUnit; + GET_TC(tc)->longValue = PyDateTimeToEpoch(obj, base); + tc->type = JT_LONG; + } + return; + } else if (PyDelta_Check(obj)) { + if (PyObject_HasAttrString(obj, "_value")) { + value = get_long_attr(obj, "_value"); + } else { + value = total_seconds(obj) * 1000000000LL; // nanoseconds per sec + } + + if (value == get_nat()) { + tc->type = JT_NULL; + return; + } else if (enc->datetimeIso) { + pc->PyTypeToUTF8 = NpyTimeDeltaToIsoCallback; + tc->type = JT_UTF8; + } else { + unit = ((PyObjectEncoder *)tc->encoder)->datetimeUnit; + if (scaleNanosecToUnit(&value, unit) != 0) { + // TODO(username): Add some kind of error handling here + } + + exc = PyErr_Occurred(); + + if (exc && PyErr_ExceptionMatches(PyExc_OverflowError)) { + goto INVALID; + } + + tc->type = JT_LONG; + } + GET_TC(tc)->longValue = value; + return; + } else if (PyArray_IsScalar(obj, Integer)) { + tc->type = JT_LONG; + PyArray_CastScalarToCtype(obj, &(GET_TC(tc)->longValue), + PyArray_DescrFromType(NPY_INT64)); + + exc = PyErr_Occurred(); + + if (exc && PyErr_ExceptionMatches(PyExc_OverflowError)) { + goto INVALID; + } + + return; + } else if (PyArray_IsScalar(obj, Bool)) { + PyArray_CastScalarToCtype(obj, &(GET_TC(tc)->longValue), + PyArray_DescrFromType(NPY_BOOL)); + tc->type = (GET_TC(tc)->longValue) ? JT_TRUE : JT_FALSE; + return; + } else if (PyArray_IsScalar(obj, Float) || PyArray_IsScalar(obj, Double)) { + PyArray_CastScalarToCtype(obj, &(GET_TC(tc)->doubleValue), + PyArray_DescrFromType(NPY_DOUBLE)); + tc->type = JT_DOUBLE; + return; + } else if (PyArray_Check(obj) && PyArray_CheckScalar(obj)) { + PyErr_Format(PyExc_TypeError, + "%R (0d array) is not JSON serializable at the moment", + obj); + goto INVALID; + } else if (object_is_na_type(obj)) { + tc->type = JT_NULL; + return; + } + +ISITERABLE: + + if (object_is_index_type(obj)) { + if (enc->outputFormat == SPLIT) { + tc->type = JT_OBJECT; + pc->iterBegin = Index_iterBegin; + pc->iterEnd = Index_iterEnd; + pc->iterNext = Index_iterNext; + pc->iterGetValue = Index_iterGetValue; + pc->iterGetName = Index_iterGetName; + return; + } + + pc->newObj = get_values(obj); + if (pc->newObj) { + tc->type = JT_ARRAY; + pc->iterBegin = NpyArr_iterBegin; + pc->iterEnd = NpyArr_iterEnd; + pc->iterNext = NpyArr_iterNext; + pc->iterGetValue = NpyArr_iterGetValue; + pc->iterGetName = NpyArr_iterGetName; + } else { + goto INVALID; + } + + return; + } else if (object_is_series_type(obj)) { + if (enc->outputFormat == SPLIT) { + tc->type = JT_OBJECT; + pc->iterBegin = Series_iterBegin; + pc->iterEnd = Series_iterEnd; + pc->iterNext = Series_iterNext; + pc->iterGetValue = Series_iterGetValue; + pc->iterGetName = Series_iterGetName; + return; + } + + pc->newObj = get_values(obj); + if (!pc->newObj) { + goto INVALID; + } + + if (enc->outputFormat == INDEX || enc->outputFormat == COLUMNS) { + tc->type = JT_OBJECT; + tmpObj = PyObject_GetAttrString(obj, "index"); + if (!tmpObj) { + goto INVALID; + } + values = get_values(tmpObj); + Py_DECREF(tmpObj); + if (!values) { + goto INVALID; + } + pc->columnLabelsLen = PyArray_DIM(pc->newObj, 0); + pc->columnLabels = NpyArr_encodeLabels((PyArrayObject *)values, enc, + pc->columnLabelsLen); + if (!pc->columnLabels) { + goto INVALID; + } + } else { + tc->type = JT_ARRAY; + } + pc->iterBegin = NpyArr_iterBegin; + pc->iterEnd = NpyArr_iterEnd; + pc->iterNext = NpyArr_iterNext; + pc->iterGetValue = NpyArr_iterGetValue; + pc->iterGetName = NpyArr_iterGetName; + return; + } else if (PyArray_Check(obj)) { + if (enc->npyCtxtPassthru) { + pc->npyarr = enc->npyCtxtPassthru; + tc->type = (pc->npyarr->columnLabels ? JT_OBJECT : JT_ARRAY); + + pc->iterBegin = NpyArrPassThru_iterBegin; + pc->iterNext = NpyArr_iterNext; + pc->iterEnd = NpyArrPassThru_iterEnd; + pc->iterGetValue = NpyArr_iterGetValue; + pc->iterGetName = NpyArr_iterGetName; + + enc->npyCtxtPassthru = NULL; + return; + } + + tc->type = JT_ARRAY; + pc->iterBegin = NpyArr_iterBegin; + pc->iterEnd = NpyArr_iterEnd; + pc->iterNext = NpyArr_iterNext; + pc->iterGetValue = NpyArr_iterGetValue; + pc->iterGetName = NpyArr_iterGetName; + return; + } else if (object_is_dataframe_type(obj)) { + if (enc->blkCtxtPassthru) { + pc->pdblock = enc->blkCtxtPassthru; + tc->type = + (pc->pdblock->npyCtxts[0]->columnLabels ? JT_OBJECT : JT_ARRAY); + + pc->iterBegin = PdBlockPassThru_iterBegin; + pc->iterEnd = PdBlockPassThru_iterEnd; + pc->iterNext = PdBlock_iterNextItem; + pc->iterGetName = PdBlock_iterGetName; + pc->iterGetValue = NpyArr_iterGetValue; + + enc->blkCtxtPassthru = NULL; + return; + } + + if (enc->outputFormat == SPLIT) { + tc->type = JT_OBJECT; + pc->iterBegin = DataFrame_iterBegin; + pc->iterEnd = DataFrame_iterEnd; + pc->iterNext = DataFrame_iterNext; + pc->iterGetValue = DataFrame_iterGetValue; + pc->iterGetName = DataFrame_iterGetName; + return; + } + + if (is_simple_frame(obj)) { + pc->iterBegin = NpyArr_iterBegin; + pc->iterEnd = NpyArr_iterEnd; + pc->iterNext = NpyArr_iterNext; + pc->iterGetName = NpyArr_iterGetName; + + pc->newObj = PyObject_GetAttrString(obj, "values"); + if (!pc->newObj) { + goto INVALID; + } + } else { + pc->iterBegin = PdBlock_iterBegin; + pc->iterEnd = PdBlock_iterEnd; + pc->iterNext = PdBlock_iterNext; + pc->iterGetName = PdBlock_iterGetName; + } + pc->iterGetValue = NpyArr_iterGetValue; + + if (enc->outputFormat == VALUES) { + tc->type = JT_ARRAY; + } else if (enc->outputFormat == RECORDS) { + tc->type = JT_ARRAY; + tmpObj = PyObject_GetAttrString(obj, "columns"); + if (!tmpObj) { + goto INVALID; + } + values = get_values(tmpObj); + if (!values) { + Py_DECREF(tmpObj); + goto INVALID; + } + pc->columnLabelsLen = PyObject_Size(tmpObj); + pc->columnLabels = NpyArr_encodeLabels((PyArrayObject *)values, enc, + pc->columnLabelsLen); + Py_DECREF(tmpObj); + if (!pc->columnLabels) { + goto INVALID; + } + } else if (enc->outputFormat == INDEX || enc->outputFormat == COLUMNS) { + tc->type = JT_OBJECT; + tmpObj = (enc->outputFormat == INDEX + ? PyObject_GetAttrString(obj, "index") + : PyObject_GetAttrString(obj, "columns")); + if (!tmpObj) { + goto INVALID; + } + values = get_values(tmpObj); + if (!values) { + Py_DECREF(tmpObj); + goto INVALID; + } + pc->rowLabelsLen = PyObject_Size(tmpObj); + pc->rowLabels = NpyArr_encodeLabels((PyArrayObject *)values, enc, + pc->rowLabelsLen); + Py_DECREF(tmpObj); + tmpObj = (enc->outputFormat == INDEX + ? PyObject_GetAttrString(obj, "columns") + : PyObject_GetAttrString(obj, "index")); + if (!tmpObj) { + NpyArr_freeLabels(pc->rowLabels, pc->rowLabelsLen); + pc->rowLabels = NULL; + goto INVALID; + } + values = get_values(tmpObj); + if (!values) { + Py_DECREF(tmpObj); + NpyArr_freeLabels(pc->rowLabels, pc->rowLabelsLen); + pc->rowLabels = NULL; + goto INVALID; + } + pc->columnLabelsLen = PyObject_Size(tmpObj); + pc->columnLabels = NpyArr_encodeLabels((PyArrayObject *)values, enc, + pc->columnLabelsLen); + Py_DECREF(tmpObj); + if (!pc->columnLabels) { + NpyArr_freeLabels(pc->rowLabels, pc->rowLabelsLen); + pc->rowLabels = NULL; + goto INVALID; + } + + if (enc->outputFormat == COLUMNS) { + pc->transpose = 1; + } + } else { + goto INVALID; + } + return; + } else if (PyDict_Check(obj)) { + tc->type = JT_OBJECT; + pc->iterBegin = Dict_iterBegin; + pc->iterEnd = Dict_iterEnd; + pc->iterNext = Dict_iterNext; + pc->iterGetValue = Dict_iterGetValue; + pc->iterGetName = Dict_iterGetName; + pc->dictObj = obj; + Py_INCREF(obj); + + return; + } else if (PyList_Check(obj)) { + tc->type = JT_ARRAY; + pc->iterBegin = List_iterBegin; + pc->iterEnd = List_iterEnd; + pc->iterNext = List_iterNext; + pc->iterGetValue = List_iterGetValue; + pc->iterGetName = List_iterGetName; + return; + } else if (PyTuple_Check(obj)) { + tc->type = JT_ARRAY; + pc->iterBegin = Tuple_iterBegin; + pc->iterEnd = Tuple_iterEnd; + pc->iterNext = Tuple_iterNext; + pc->iterGetValue = Tuple_iterGetValue; + pc->iterGetName = Tuple_iterGetName; + return; + } else if (PyAnySet_Check(obj)) { + tc->type = JT_ARRAY; + pc->iterBegin = Set_iterBegin; + pc->iterEnd = Set_iterEnd; + pc->iterNext = Set_iterNext; + pc->iterGetValue = Set_iterGetValue; + pc->iterGetName = Set_iterGetName; + return; + } + + toDictFunc = PyObject_GetAttrString(obj, "toDict"); + + if (toDictFunc) { + PyObject *tuple = PyTuple_New(0); + PyObject *toDictResult = PyObject_Call(toDictFunc, tuple, NULL); + Py_DECREF(tuple); + Py_DECREF(toDictFunc); + + if (toDictResult == NULL) { + PyErr_Clear(); + tc->type = JT_NULL; + return; + } + + if (!PyDict_Check(toDictResult)) { + Py_DECREF(toDictResult); + tc->type = JT_NULL; + return; + } + + tc->type = JT_OBJECT; + pc->iterBegin = Dict_iterBegin; + pc->iterEnd = Dict_iterEnd; + pc->iterNext = Dict_iterNext; + pc->iterGetValue = Dict_iterGetValue; + pc->iterGetName = Dict_iterGetName; + pc->dictObj = toDictResult; + return; + } + + PyErr_Clear(); + + if (enc->defaultHandler) { + Object_invokeDefaultHandler(obj, enc); + goto INVALID; + } + + tc->type = JT_OBJECT; + pc->iterBegin = Dir_iterBegin; + pc->iterEnd = Dir_iterEnd; + pc->iterNext = Dir_iterNext; + pc->iterGetValue = Dir_iterGetValue; + pc->iterGetName = Dir_iterGetName; + return; + +INVALID: + tc->type = JT_INVALID; + PyObject_Free(tc->prv); + tc->prv = NULL; + return; +} + +void Object_endTypeContext(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + if (tc->prv) { + Py_XDECREF(GET_TC(tc)->newObj); + GET_TC(tc)->newObj = NULL; + NpyArr_freeLabels(GET_TC(tc)->rowLabels, GET_TC(tc)->rowLabelsLen); + GET_TC(tc)->rowLabels = NULL; + NpyArr_freeLabels(GET_TC(tc)->columnLabels, + GET_TC(tc)->columnLabelsLen); + GET_TC(tc)->columnLabels = NULL; + PyObject_Free(GET_TC(tc)->cStr); + GET_TC(tc)->cStr = NULL; + PyObject_Free(tc->prv); + tc->prv = NULL; + } +} + +const char *Object_getStringValue(JSOBJ obj, JSONTypeContext *tc, + size_t *_outLen) { + return GET_TC(tc)->PyTypeToUTF8(obj, tc, _outLen); +} + +JSINT64 Object_getLongValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + return GET_TC(tc)->longValue; +} + +double Object_getDoubleValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + return GET_TC(tc)->doubleValue; +} + +const char *Object_getBigNumStringValue(JSOBJ obj, JSONTypeContext *tc, + size_t *_outLen) { + PyObject *repr = PyObject_Str(obj); + const char *str = PyUnicode_AsUTF8AndSize(repr, (Py_ssize_t *)_outLen); + char *bytes = PyObject_Malloc(*_outLen + 1); + memcpy(bytes, str, *_outLen + 1); + GET_TC(tc)->cStr = bytes; + + Py_DECREF(repr); + + return GET_TC(tc)->cStr; +} + +static void Object_releaseObject(JSOBJ _obj) { Py_DECREF((PyObject *)_obj); } + +void Object_iterBegin(JSOBJ obj, JSONTypeContext *tc) { + GET_TC(tc)->iterBegin(obj, tc); +} + +int Object_iterNext(JSOBJ obj, JSONTypeContext *tc) { + return GET_TC(tc)->iterNext(obj, tc); +} + +void Object_iterEnd(JSOBJ obj, JSONTypeContext *tc) { + GET_TC(tc)->iterEnd(obj, tc); +} + +JSOBJ Object_iterGetValue(JSOBJ obj, JSONTypeContext *tc) { + return GET_TC(tc)->iterGetValue(obj, tc); +} + +char *Object_iterGetName(JSOBJ obj, JSONTypeContext *tc, size_t *outLen) { + return GET_TC(tc)->iterGetName(obj, tc, outLen); +} + +PyObject *objToJSON(PyObject *Py_UNUSED(self), PyObject *args, + PyObject *kwargs) { + PyDateTime_IMPORT; + if (PyDateTimeAPI == NULL) { + return NULL; + } + + PandasDateTime_IMPORT; + if (PandasDateTimeAPI == NULL) { + return NULL; + } + + static char *kwlist[] = {"obj", + "ensure_ascii", + "double_precision", + "encode_html_chars", + "orient", + "date_unit", + "iso_dates", + "default_handler", + "indent", + NULL}; + + char buffer[65536]; + char *ret; + PyObject *newobj; + PyObject *oinput = NULL; + PyObject *oensureAscii = NULL; + int idoublePrecision = 10; // default double precision setting + PyObject *oencodeHTMLChars = NULL; + char *sOrient = NULL; + char *sdateFormat = NULL; + PyObject *oisoDates = 0; + PyObject *odefHandler = 0; + int indent = 0; + + PyObjectEncoder pyEncoder = {{ + Object_beginTypeContext, + Object_endTypeContext, + Object_getStringValue, + Object_getLongValue, + NULL, // getIntValue is unused + Object_getDoubleValue, + Object_getBigNumStringValue, + Object_iterBegin, + Object_iterNext, + Object_iterEnd, + Object_iterGetValue, + Object_iterGetName, + Object_releaseObject, + PyObject_Malloc, + PyObject_Realloc, + PyObject_Free, + -1, // recursionMax + idoublePrecision, + 1, // forceAscii + 0, // encodeHTMLChars + 0, // indent + }}; + JSONObjectEncoder *encoder = (JSONObjectEncoder *)&pyEncoder; + + pyEncoder.npyCtxtPassthru = NULL; + pyEncoder.blkCtxtPassthru = NULL; + pyEncoder.npyType = -1; + pyEncoder.npyValue = NULL; + pyEncoder.datetimeIso = 0; + pyEncoder.datetimeUnit = NPY_FR_ms; + pyEncoder.outputFormat = COLUMNS; + pyEncoder.defaultHandler = 0; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|OiOssOOi", kwlist, + &oinput, &oensureAscii, &idoublePrecision, + &oencodeHTMLChars, &sOrient, &sdateFormat, + &oisoDates, &odefHandler, &indent)) { + return NULL; + } + + if (oensureAscii != NULL && !PyObject_IsTrue(oensureAscii)) { + encoder->forceASCII = 0; + } + + if (oencodeHTMLChars != NULL && PyObject_IsTrue(oencodeHTMLChars)) { + encoder->encodeHTMLChars = 1; + } + + if (idoublePrecision > JSON_DOUBLE_MAX_DECIMALS || idoublePrecision < 0) { + PyErr_Format( + PyExc_ValueError, + "Invalid value '%d' for option 'double_precision', max is '%u'", + idoublePrecision, JSON_DOUBLE_MAX_DECIMALS); + return NULL; + } + encoder->doublePrecision = idoublePrecision; + + if (sOrient != NULL) { + if (strcmp(sOrient, "records") == 0) { + pyEncoder.outputFormat = RECORDS; + } else if (strcmp(sOrient, "index") == 0) { + pyEncoder.outputFormat = INDEX; + } else if (strcmp(sOrient, "split") == 0) { + pyEncoder.outputFormat = SPLIT; + } else if (strcmp(sOrient, "values") == 0) { + pyEncoder.outputFormat = VALUES; + } else if (strcmp(sOrient, "columns") != 0) { + PyErr_Format(PyExc_ValueError, + "Invalid value '%s' for option 'orient'", sOrient); + return NULL; + } + } + + if (sdateFormat != NULL) { + if (strcmp(sdateFormat, "s") == 0) { + pyEncoder.datetimeUnit = NPY_FR_s; + } else if (strcmp(sdateFormat, "ms") == 0) { + pyEncoder.datetimeUnit = NPY_FR_ms; + } else if (strcmp(sdateFormat, "us") == 0) { + pyEncoder.datetimeUnit = NPY_FR_us; + } else if (strcmp(sdateFormat, "ns") == 0) { + pyEncoder.datetimeUnit = NPY_FR_ns; + } else { + PyErr_Format(PyExc_ValueError, + "Invalid value '%s' for option 'date_unit'", + sdateFormat); + return NULL; + } + } + + if (oisoDates != NULL && PyObject_IsTrue(oisoDates)) { + pyEncoder.datetimeIso = 1; + } + + if (odefHandler != NULL && odefHandler != Py_None) { + if (!PyCallable_Check(odefHandler)) { + PyErr_SetString(PyExc_TypeError, "Default handler is not callable"); + return NULL; + } + pyEncoder.defaultHandler = odefHandler; + } + + encoder->indent = indent; + + pyEncoder.originalOutputFormat = pyEncoder.outputFormat; + ret = JSON_EncodeObject(oinput, encoder, buffer, sizeof(buffer)); + if (PyErr_Occurred()) { + return NULL; + } + + if (encoder->errorMsg) { + if (ret != buffer) { + encoder->free(ret); + } + PyErr_Format(PyExc_OverflowError, "%s", encoder->errorMsg); + return NULL; + } + + newobj = PyUnicode_FromString(ret); + + if (ret != buffer) { + encoder->free(ret); + } + + return newobj; +} diff --git a/pandas/_libs/src/vendored/ujson/python/ujson.c b/pandas/_libs/src/vendored/ujson/python/ujson.c new file mode 100644 index 0000000000000..15ea4b056b02d --- /dev/null +++ b/pandas/_libs/src/vendored/ujson/python/ujson.c @@ -0,0 +1,451 @@ +/* +Copyright (c) 2011-2013, ESN Social Software AB and Jonas Tarnstrom +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: +* Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +* Neither the name of the ESN Social Software AB nor the +names of its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL ESN SOCIAL SOFTWARE AB OR JONAS TARNSTROM BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +Portions of code from MODP_ASCII - Ascii transformations (upper/lower, etc) +https://github.com/client9/stringencoders +Copyright (c) 2007 Nick Galbreath -- nickg [at] modp [dot] com. All rights reserved. + +Numeric decoder derived from TCL library +https://www.opensource.apple.com/source/tcl/tcl-14/tcl/license.terms +* Copyright (c) 1988-1993 The Regents of the University of California. +* Copyright (c) 1994 Sun Microsystems, Inc. +*/ + +#include "pandas/vendored/ujson/python/version.h" +#define PY_SSIZE_T_CLEAN +#include +#define PY_ARRAY_UNIQUE_SYMBOL UJSON_NUMPY +#include "numpy/arrayobject.h" + +/* objToJSON */ +PyObject *objToJSON(PyObject *self, PyObject *args, PyObject *kwargs); +void *initObjToJSON(void); + +/* JSONToObj */ +PyObject *JSONToObj(PyObject *self, PyObject *args, PyObject *kwargs); + +#define ENCODER_HELP_TEXT \ + "Use ensure_ascii=false to output UTF-8. Pass in double_precision to " \ + "alter the maximum digit precision of doubles. Set " \ + "encode_html_chars=True to encode < > & as unicode escape sequences." + +static PyMethodDef ujsonMethods[] = { + {"encode", (PyCFunction)objToJSON, METH_VARARGS | METH_KEYWORDS, + "Converts arbitrary object recursively into JSON. " ENCODER_HELP_TEXT}, + {"decode", (PyCFunction)JSONToObj, METH_VARARGS | METH_KEYWORDS, + "Converts JSON as string to dict object structure. Use precise_float=True " + "to use high precision float decoder."}, + {"dumps", (PyCFunction)objToJSON, METH_VARARGS | METH_KEYWORDS, + "Converts arbitrary object recursively into JSON. " ENCODER_HELP_TEXT}, + {"loads", (PyCFunction)JSONToObj, METH_VARARGS | METH_KEYWORDS, + "Converts JSON as string to dict object structure. Use precise_float=True " + "to use high precision float decoder."}, + {NULL, NULL, 0, NULL} /* Sentinel */ +}; + +typedef struct { + PyObject *type_decimal; + PyObject *type_dataframe; + PyObject *type_series; + PyObject *type_index; + PyObject *type_nat; + PyObject *type_na; +} modulestate; + +#define modulestate(o) ((modulestate *)PyModule_GetState(o)) + +static int module_traverse(PyObject *m, visitproc visit, void *arg); +static int module_clear(PyObject *m); +static void module_free(void *module); + +static struct PyModuleDef moduledef = {.m_base = PyModuleDef_HEAD_INIT, + .m_name = "pandas._libs.json", + .m_methods = ujsonMethods, + .m_size = sizeof(modulestate), + .m_traverse = module_traverse, + .m_clear = module_clear, + .m_free = module_free}; + +#ifndef PYPY_VERSION +/* Used in objToJSON.c */ +int object_is_decimal_type(PyObject *obj) { + PyObject *module = PyState_FindModule(&moduledef); + if (module == NULL) + return 0; + modulestate *state = modulestate(module); + if (state == NULL) + return 0; + PyObject *type_decimal = state->type_decimal; + if (type_decimal == NULL) { + PyErr_Clear(); + return 0; + } + int result = PyObject_IsInstance(obj, type_decimal); + if (result == -1) { + PyErr_Clear(); + return 0; + } + return result; +} + +int object_is_dataframe_type(PyObject *obj) { + PyObject *module = PyState_FindModule(&moduledef); + if (module == NULL) + return 0; + modulestate *state = modulestate(module); + if (state == NULL) + return 0; + PyObject *type_dataframe = state->type_dataframe; + if (type_dataframe == NULL) { + PyErr_Clear(); + return 0; + } + int result = PyObject_IsInstance(obj, type_dataframe); + if (result == -1) { + PyErr_Clear(); + return 0; + } + return result; +} + +int object_is_series_type(PyObject *obj) { + PyObject *module = PyState_FindModule(&moduledef); + if (module == NULL) + return 0; + modulestate *state = modulestate(module); + if (state == NULL) + return 0; + PyObject *type_series = state->type_series; + if (type_series == NULL) { + PyErr_Clear(); + return 0; + } + int result = PyObject_IsInstance(obj, type_series); + if (result == -1) { + PyErr_Clear(); + return 0; + } + return result; +} + +int object_is_index_type(PyObject *obj) { + PyObject *module = PyState_FindModule(&moduledef); + if (module == NULL) + return 0; + modulestate *state = modulestate(module); + if (state == NULL) + return 0; + PyObject *type_index = state->type_index; + if (type_index == NULL) { + PyErr_Clear(); + return 0; + } + int result = PyObject_IsInstance(obj, type_index); + if (result == -1) { + PyErr_Clear(); + return 0; + } + return result; +} + +int object_is_nat_type(PyObject *obj) { + PyObject *module = PyState_FindModule(&moduledef); + if (module == NULL) + return 0; + modulestate *state = modulestate(module); + if (state == NULL) + return 0; + PyObject *type_nat = state->type_nat; + if (type_nat == NULL) { + PyErr_Clear(); + return 0; + } + int result = PyObject_IsInstance(obj, type_nat); + if (result == -1) { + PyErr_Clear(); + return 0; + } + return result; +} + +int object_is_na_type(PyObject *obj) { + PyObject *module = PyState_FindModule(&moduledef); + if (module == NULL) + return 0; + modulestate *state = modulestate(module); + if (state == NULL) + return 0; + PyObject *type_na = state->type_na; + if (type_na == NULL) { + PyErr_Clear(); + return 0; + } + int result = PyObject_IsInstance(obj, type_na); + if (result == -1) { + PyErr_Clear(); + return 0; + } + return result; +} +#else + /* Used in objToJSON.c */ +int object_is_decimal_type(PyObject *obj) { + PyObject *module = PyImport_ImportModule("decimal"); + if (module == NULL) { + PyErr_Clear(); + return 0; + } + PyObject *type_decimal = PyObject_GetAttrString(module, "Decimal"); + if (type_decimal == NULL) { + Py_DECREF(module); + PyErr_Clear(); + return 0; + } + int result = PyObject_IsInstance(obj, type_decimal); + if (result == -1) { + Py_DECREF(module); + Py_DECREF(type_decimal); + PyErr_Clear(); + return 0; + } + return result; +} + +int object_is_dataframe_type(PyObject *obj) { + PyObject *module = PyImport_ImportModule("pandas"); + if (module == NULL) { + PyErr_Clear(); + return 0; + } + PyObject *type_dataframe = PyObject_GetAttrString(module, "DataFrame"); + if (type_dataframe == NULL) { + Py_DECREF(module); + PyErr_Clear(); + return 0; + } + int result = PyObject_IsInstance(obj, type_dataframe); + if (result == -1) { + Py_DECREF(module); + Py_DECREF(type_dataframe); + PyErr_Clear(); + return 0; + } + return result; +} + +int object_is_series_type(PyObject *obj) { + PyObject *module = PyImport_ImportModule("pandas"); + if (module == NULL) { + PyErr_Clear(); + return 0; + } + PyObject *type_series = PyObject_GetAttrString(module, "Series"); + if (type_series == NULL) { + Py_DECREF(module); + PyErr_Clear(); + return 0; + } + int result = PyObject_IsInstance(obj, type_series); + if (result == -1) { + Py_DECREF(module); + Py_DECREF(type_series); + PyErr_Clear(); + return 0; + } + return result; +} + +int object_is_index_type(PyObject *obj) { + PyObject *module = PyImport_ImportModule("pandas"); + if (module == NULL) { + PyErr_Clear(); + return 0; + } + PyObject *type_index = PyObject_GetAttrString(module, "Index"); + if (type_index == NULL) { + Py_DECREF(module); + PyErr_Clear(); + return 0; + } + int result = PyObject_IsInstance(obj, type_index); + if (result == -1) { + Py_DECREF(module); + Py_DECREF(type_index); + PyErr_Clear(); + return 0; + } + return result; +} + +int object_is_nat_type(PyObject *obj) { + PyObject *module = PyImport_ImportModule("pandas._libs.tslibs.nattype"); + if (module == NULL) { + PyErr_Clear(); + return 0; + } + PyObject *type_nat = PyObject_GetAttrString(module, "NaTType"); + if (type_nat == NULL) { + Py_DECREF(module); + PyErr_Clear(); + return 0; + } + int result = PyObject_IsInstance(obj, type_nat); + if (result == -1) { + Py_DECREF(module); + Py_DECREF(type_nat); + PyErr_Clear(); + return 0; + } + return result; +} + +int object_is_na_type(PyObject *obj) { + PyObject *module = PyImport_ImportModule("pandas._libs.missing"); + if (module == NULL) { + PyErr_Clear(); + return 0; + } + PyObject *type_na = PyObject_GetAttrString(module, "NAType"); + if (type_na == NULL) { + Py_DECREF(module); + PyErr_Clear(); + return 0; + } + int result = PyObject_IsInstance(obj, type_na); + if (result == -1) { + Py_DECREF(module); + Py_DECREF(type_na); + PyErr_Clear(); + return 0; + } + return result; +} + +#endif + +static int module_traverse(PyObject *m, visitproc visit, void *arg) { + Py_VISIT(modulestate(m)->type_decimal); + Py_VISIT(modulestate(m)->type_dataframe); + Py_VISIT(modulestate(m)->type_series); + Py_VISIT(modulestate(m)->type_index); + Py_VISIT(modulestate(m)->type_nat); + Py_VISIT(modulestate(m)->type_na); + return 0; +} + +static int module_clear(PyObject *m) { + Py_CLEAR(modulestate(m)->type_decimal); + Py_CLEAR(modulestate(m)->type_dataframe); + Py_CLEAR(modulestate(m)->type_series); + Py_CLEAR(modulestate(m)->type_index); + Py_CLEAR(modulestate(m)->type_nat); + Py_CLEAR(modulestate(m)->type_na); + return 0; +} + +static void module_free(void *module) { module_clear((PyObject *)module); } + +PyMODINIT_FUNC PyInit_json(void) { + import_array() + PyObject *module; + +#ifndef PYPY_VERSION + // This function is not supported in PyPy. + if ((module = PyState_FindModule(&moduledef)) != NULL) { + Py_INCREF(module); + return module; + } +#endif + + module = PyModule_Create(&moduledef); + if (module == NULL) { + return NULL; + } + +#ifndef PYPY_VERSION + PyObject *mod_decimal = PyImport_ImportModule("decimal"); + if (mod_decimal) { + PyObject *type_decimal = PyObject_GetAttrString(mod_decimal, "Decimal"); + assert(type_decimal != NULL); + modulestate(module)->type_decimal = type_decimal; + Py_DECREF(mod_decimal); + } + + PyObject *mod_pandas = PyImport_ImportModule("pandas"); + if (mod_pandas) { + PyObject *type_dataframe = + PyObject_GetAttrString(mod_pandas, "DataFrame"); + assert(type_dataframe != NULL); + modulestate(module)->type_dataframe = type_dataframe; + + PyObject *type_series = PyObject_GetAttrString(mod_pandas, "Series"); + assert(type_series != NULL); + modulestate(module)->type_series = type_series; + + PyObject *type_index = PyObject_GetAttrString(mod_pandas, "Index"); + assert(type_index != NULL); + modulestate(module)->type_index = type_index; + + Py_DECREF(mod_pandas); + } + + PyObject *mod_nattype = + PyImport_ImportModule("pandas._libs.tslibs.nattype"); + if (mod_nattype) { + PyObject *type_nat = PyObject_GetAttrString(mod_nattype, "NaTType"); + assert(type_nat != NULL); + modulestate(module)->type_nat = type_nat; + + Py_DECREF(mod_nattype); + } + + PyObject *mod_natype = PyImport_ImportModule("pandas._libs.missing"); + if (mod_natype) { + PyObject *type_na = PyObject_GetAttrString(mod_natype, "NAType"); + assert(type_na != NULL); + modulestate(module)->type_na = type_na; + + Py_DECREF(mod_natype); + } else { + PyErr_Clear(); + } +#endif + + /* Not vendored for now + JSONDecodeError = PyErr_NewException("ujson.JSONDecodeError", + PyExc_ValueError, NULL); Py_XINCREF(JSONDecodeError); if + (PyModule_AddObject(module, "JSONDecodeError", JSONDecodeError) < 0) + { + Py_XDECREF(JSONDecodeError); + Py_CLEAR(JSONDecodeError); + Py_DECREF(module); + return NULL; + } + */ + + return module; +} From 99bdf43a1def377580857b4f176272dcd7ece266 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 22 May 2023 16:48:44 -0700 Subject: [PATCH 4/4] Missing graft --- MANIFEST.in | 1 + 1 file changed, 1 insertion(+) diff --git a/MANIFEST.in b/MANIFEST.in index 0846cc3690c47..781a72fdb5481 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -56,4 +56,5 @@ global-exclude *.pxi prune pandas/tests/io/parser/data # Selectively re-add *.cxx files that were excluded above +graft pandas/_libs/src graft pandas/_libs/include