From a8d15bd4c6dae2c65ed4aa4b2f76c029b6358113 Mon Sep 17 00:00:00 2001 From: Kieran O'Mahony Date: Tue, 18 Jun 2013 14:59:48 +0100 Subject: [PATCH 1/2] ENH: update bundled ujson to latest v1.33 --- pandas/io/tests/test_json/test_pandas.py | 6 +- pandas/io/tests/test_json/test_ujson.py | 253 ++- pandas/src/ujson/lib/ultrajson.h | 287 +-- pandas/src/ujson/lib/ultrajsondec.c | 1384 +++++++------- pandas/src/ujson/lib/ultrajsonenc.c | 1368 ++++++------- pandas/src/ujson/python/JSONtoObj.c | 1040 +++++----- pandas/src/ujson/python/objToJSON.c | 2217 ++++++++++++---------- pandas/src/ujson/python/py_defines.h | 37 + pandas/src/ujson/python/ujson.c | 93 +- pandas/src/ujson/python/version.h | 39 +- 10 files changed, 3687 insertions(+), 3037 deletions(-) diff --git a/pandas/io/tests/test_json/test_pandas.py b/pandas/io/tests/test_json/test_pandas.py index fe717f56e6bea..997229487e1b9 100644 --- a/pandas/io/tests/test_json/test_pandas.py +++ b/pandas/io/tests/test_json/test_pandas.py @@ -179,21 +179,21 @@ def test_frame_from_json_bad_data(self): # too few indices json = StringIO('{"columns":["A","B"],' '"index":["2","3"],' - '"data":[[1.0,"1"],[2.0,"2"],[null,"3"]]}"') + '"data":[[1.0,"1"],[2.0,"2"],[null,"3"]]}') self.assertRaises(ValueError, read_json, json, orient="split") # too many columns json = StringIO('{"columns":["A","B","C"],' '"index":["1","2","3"],' - '"data":[[1.0,"1"],[2.0,"2"],[null,"3"]]}"') + '"data":[[1.0,"1"],[2.0,"2"],[null,"3"]]}') self.assertRaises(AssertionError, read_json, json, orient="split") # bad key json = StringIO('{"badkey":["A","B"],' '"index":["2","3"],' - '"data":[[1.0,"1"],[2.0,"2"],[null,"3"]]}"') + '"data":[[1.0,"1"],[2.0,"2"],[null,"3"]]}') self.assertRaises(TypeError, read_json, json, orient="split") diff --git a/pandas/io/tests/test_json/test_ujson.py b/pandas/io/tests/test_json/test_ujson.py index 2e775b4a541ea..23bd41d245f75 100644 --- a/pandas/io/tests/test_json/test_ujson.py +++ b/pandas/io/tests/test_json/test_ujson.py @@ -15,6 +15,8 @@ import calendar import StringIO import re +import random +import decimal from functools import partial import pandas.util.py3compat as py3compat @@ -36,6 +38,72 @@ def _skip_if_python_ver(skip_major, skip_minor=None): else partial(json.dumps, encoding="utf-8")) class UltraJSONTests(TestCase): + + def test_encodeDecimal(self): + sut = decimal.Decimal("1337.1337") + encoded = ujson.encode(sut, double_precision=100) + decoded = ujson.decode(encoded) + self.assertEquals(decoded, 1337.1337) + + def test_encodeStringConversion(self): + input = "A string \\ / \b \f \n \r \t &" + not_html_encoded = '"A string \\\\ \\/ \\b \\f \\n \\r \\t <\\/script> &"' + html_encoded = '"A string \\\\ \\/ \\b \\f \\n \\r \\t \\u003c\\/script\\u003e \\u0026"' + + def helper(expected_output, **encode_kwargs): + output = ujson.encode(input, **encode_kwargs) + self.assertEquals(input, json.loads(output)) + self.assertEquals(output, expected_output) + self.assertEquals(input, ujson.decode(output)) + + # Default behavior assumes encode_html_chars=False. + helper(not_html_encoded, ensure_ascii=True) + helper(not_html_encoded, ensure_ascii=False) + + # Make sure explicit encode_html_chars=False works. + helper(not_html_encoded, ensure_ascii=True, encode_html_chars=False) + helper(not_html_encoded, ensure_ascii=False, encode_html_chars=False) + + # Make sure explicit encode_html_chars=True does the encoding. + helper(html_encoded, ensure_ascii=True, encode_html_chars=True) + helper(html_encoded, ensure_ascii=False, encode_html_chars=True) + + def test_doubleLongIssue(self): + sut = {u'a': -4342969734183514} + encoded = json.dumps(sut) + decoded = json.loads(encoded) + self.assertEqual(sut, decoded) + encoded = ujson.encode(sut, double_precision=100) + decoded = ujson.decode(encoded) + self.assertEqual(sut, decoded) + + def test_doubleLongDecimalIssue(self): + sut = {u'a': -12345678901234.56789012} + encoded = json.dumps(sut) + decoded = json.loads(encoded) + self.assertEqual(sut, decoded) + encoded = ujson.encode(sut, double_precision=100) + decoded = ujson.decode(encoded) + self.assertEqual(sut, decoded) + + + def test_encodeDecodeLongDecimal(self): + sut = {u'a': -528656961.4399388} + encoded = ujson.dumps(sut, double_precision=15) + ujson.decode(encoded) + + def test_decimalDecodeTest(self): + sut = {u'a': 4.56} + encoded = ujson.encode(sut) + decoded = ujson.decode(encoded) + self.assertNotEqual(sut, decoded) + + def test_decimalDecodeTestPrecise(self): + sut = {u'a': 4.56} + encoded = ujson.encode(sut) + decoded = ujson.decode(encoded, precise_float=True) + self.assertEqual(sut, decoded) + def test_encodeDictWithUnicodeKeys(self): input = { u"key1": u"value1", u"key1": u"value1", u"key1": u"value1", u"key1": u"value1", u"key1": u"value1", u"key1": u"value1" } output = ujson.encode(input) @@ -59,6 +127,7 @@ def test_encodeWithDecimal(self): def test_encodeDoubleNegConversion(self): input = -math.pi output = ujson.encode(input) + self.assertEquals(round(input, 5), round(json.loads(output), 5)) self.assertEquals(round(input, 5), round(ujson.decode(output), 5)) @@ -93,10 +162,6 @@ def test_doublePrecisionTest(self): self.assertEquals(round(input, 3), json.loads(output)) self.assertEquals(round(input, 3), ujson.decode(output)) - output = ujson.encode(input) - self.assertEquals(round(input, 5), json.loads(output)) - self.assertEquals(round(input, 5), ujson.decode(output)) - def test_invalidDoublePrecision(self): input = 30.12345678901234567890 output = ujson.encode(input, double_precision = 20) @@ -373,6 +438,15 @@ def test_decodeBrokenArrayEnd(self): return assert False, "Wrong exception" + def test_decodeArrayDepthTooBig(self): + input = '[' * (1024 * 1024) + try: + ujson.decode(input) + assert False, "Expected exception!" + except(ValueError): + return + assert False, "Wrong exception" + def test_decodeBrokenObjectEnd(self): input = "}" try: @@ -382,6 +456,15 @@ def test_decodeBrokenObjectEnd(self): return assert False, "Wrong exception" + def test_decodeObjectDepthTooBig(self): + input = '{' * (1024 * 1024) + try: + ujson.decode(input) + assert False, "Expected exception!" + except(ValueError): + return + assert False, "Wrong exception" + def test_decodeStringUnterminated(self): input = "\"TESTING" try: @@ -567,7 +650,7 @@ def test_numericIntFrcExp(self): self.assertAlmostEqual(output, json.loads(input)) def test_decodeNumericIntExpEPLUS(self): - input = "1337E+40" + input = "1337E+9" output = ujson.decode(input) self.assertAlmostEqual(output, json.loads(input)) @@ -1192,7 +1275,165 @@ def test_datetimeindex(self): decoded = Series(ujson.decode(ujson.encode(ts))) idx_values = decoded.index.values.astype(np.int64) decoded.index = DatetimeIndex(idx_values) - tm.assert_series_equal(np.round(ts, 5), decoded) + tm.assert_series_equal(ts, decoded) + + def test_decodeArrayTrailingCommaFail(self): + input = "[31337,]" + try: + ujson.decode(input) + except ValueError: + pass + else: + assert False, "expected ValueError" + + def test_decodeArrayLeadingCommaFail(self): + input = "[,31337]" + try: + ujson.decode(input) + except ValueError: + pass + else: + assert False, "expected ValueError" + + def test_decodeArrayOnlyCommaFail(self): + input = "[,]" + try: + ujson.decode(input) + except ValueError: + pass + else: + assert False, "expected ValueError" + + def test_decodeArrayUnmatchedBracketFail(self): + input = "[]]" + try: + ujson.decode(input) + except ValueError: + pass + else: + assert False, "expected ValueError" + + def test_decodeArrayEmpty(self): + input = "[]" + ujson.decode(input) + + def test_decodeArrayOneItem(self): + input = "[31337]" + ujson.decode(input) + + def test_decodeBigValue(self): + input = "9223372036854775807" + ujson.decode(input) + + def test_decodeSmallValue(self): + input = "-9223372036854775808" + ujson.decode(input) + + def test_decodeTooBigValue(self): + try: + input = "9223372036854775808" + ujson.decode(input) + except ValueError, e: + pass + else: + assert False, "expected ValueError" + + def test_decodeTooSmallValue(self): + try: + input = "-90223372036854775809" + ujson.decode(input) + except ValueError,e: + pass + else: + assert False, "expected ValueError" + + def test_decodeVeryTooBigValue(self): + try: + input = "9223372036854775808" + ujson.decode(input) + except ValueError: + pass + else: + assert False, "expected ValueError" + + def test_decodeVeryTooSmallValue(self): + try: + input = "-90223372036854775809" + ujson.decode(input) + except ValueError: + pass + else: + assert False, "expected ValueError" + + def test_decodeWithTrailingWhitespaces(self): + input = "{}\n\t " + ujson.decode(input) + + def test_decodeWithTrailingNonWhitespaces(self): + try: + input = "{}\n\t a" + ujson.decode(input) + except ValueError: + pass + else: + assert False, "expected ValueError" + + def test_decodeArrayWithBigInt(self): + try: + ujson.loads('[18446098363113800555]') + except ValueError: + pass + else: + assert False, "expected ValueError" + + def test_decodeArrayFaultyUnicode(self): + try: + ujson.loads('[18446098363113800555]') + except ValueError: + pass + else: + assert False, "expected ValueError" + + + def test_decodeFloatingPointAdditionalTests(self): + self.assertEquals(-1.1234567893, ujson.loads("-1.1234567893")) + self.assertEquals(-1.234567893, ujson.loads("-1.234567893")) + self.assertEquals(-1.34567893, ujson.loads("-1.34567893")) + self.assertEquals(-1.4567893, ujson.loads("-1.4567893")) + self.assertEquals(-1.567893, ujson.loads("-1.567893")) + self.assertEquals(-1.67893, ujson.loads("-1.67893")) + self.assertEquals(-1.7893, ujson.loads("-1.7893")) + self.assertEquals(-1.893, ujson.loads("-1.893")) + self.assertEquals(-1.3, ujson.loads("-1.3")) + + self.assertEquals(1.1234567893, ujson.loads("1.1234567893")) + self.assertEquals(1.234567893, ujson.loads("1.234567893")) + self.assertEquals(1.34567893, ujson.loads("1.34567893")) + self.assertEquals(1.4567893, ujson.loads("1.4567893")) + self.assertEquals(1.567893, ujson.loads("1.567893")) + self.assertEquals(1.67893, ujson.loads("1.67893")) + self.assertEquals(1.7893, ujson.loads("1.7893")) + self.assertEquals(1.893, ujson.loads("1.893")) + self.assertEquals(1.3, ujson.loads("1.3")) + + def test_encodeBigSet(self): + s = set() + for x in xrange(0, 100000): + s.add(x) + ujson.encode(s) + + def test_encodeEmptySet(self): + s = set() + self.assertEquals("[]", ujson.encode(s)) + + def test_encodeSet(self): + s = set([1,2,3,4,5,6,7,8,9]) + enc = ujson.encode(s) + dec = ujson.decode(enc) + + for v in dec: + self.assertTrue(v in s) + """ def test_decodeNumericIntFrcOverflow(self): diff --git a/pandas/src/ujson/lib/ultrajson.h b/pandas/src/ujson/lib/ultrajson.h index eae665f00f03e..4d7af3dde1f02 100644 --- a/pandas/src/ujson/lib/ultrajson.h +++ b/pandas/src/ujson/lib/ultrajson.h @@ -1,37 +1,38 @@ /* -Copyright (c) 2011, Jonas Tarnstrom and ESN Social Software AB +Copyright (c) 2011-2013, ESN Social Software AB and Jonas Tarnstrom All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: -1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. -2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. -3. All advertising materials mentioning features or use of this software - must display the following acknowledgement: - This product includes software developed by ESN Social Software AB (www.esn.me). -4. Neither the name of the ESN Social Software AB nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY ESN SOCIAL SOFTWARE AB ''AS IS'' AND ANY -EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the ESN Social Software AB nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL ESN SOCIAL SOFTWARE AB BE LIABLE FOR ANY -DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +DISCLAIMED. IN NO EVENT SHALL ESN SOCIAL SOFTWARE AB OR JONAS TARNSTROM BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -Portions of code from: -MODP_ASCII - Ascii transformations (upper/lower, etc) + +Portions of code from MODP_ASCII - Ascii transformations (upper/lower, etc) http://code.google.com/p/stringencoders/ Copyright (c) 2007 Nick Galbreath -- nickg [at] modp [dot] com. All rights reserved. +Numeric decoder derived from from TCL library +http://www.opensource.apple.com/source/tcl/tcl-14/tcl/license.terms + * Copyright (c) 1988-1993 The Regents of the University of California. + * Copyright (c) 1994 Sun Microsystems, Inc. */ /* @@ -54,8 +55,6 @@ tree doesn't have cyclic references. #include #include -//#define JSON_DECODE_NUMERIC_AS_DOUBLE - // Don't output any extra whitespaces when encoding #define JSON_NO_EXTRA_WHITESPACE @@ -69,6 +68,11 @@ tree doesn't have cyclic references. #define JSON_MAX_RECURSION_DEPTH 1024 #endif +// Max recursion depth, default for decoder +#ifndef JSON_MAX_OBJECT_DEPTH +#define JSON_MAX_OBJECT_DEPTH 1024 +#endif + /* Dictates and limits how much stack space for buffers UltraJSON will use before resorting to provided heap functions */ #ifndef JSON_MAX_STACK_BUFFER_SIZE @@ -95,26 +99,34 @@ typedef __int64 JSLONG; #else -#include +#include typedef int64_t JSINT64; -typedef u_int64_t JSUINT64; +typedef uint64_t JSUINT64; typedef int32_t JSINT32; -typedef u_int32_t JSUINT32; +typedef uint32_t JSUINT32; #define FASTCALL_MSVC + +#if !defined __x86_64__ #define FASTCALL_ATTR __attribute__((fastcall)) +#else +#define FASTCALL_ATTR +#endif + #define INLINE_PREFIX inline -typedef u_int8_t JSUINT8; -typedef u_int16_t JSUTF16; -typedef u_int32_t JSUTF32; +typedef uint8_t JSUINT8; +typedef uint16_t JSUTF16; +typedef uint32_t JSUTF32; typedef int64_t JSLONG; #define EXPORTFUNCTION #endif +#if !(defined(__LITTLE_ENDIAN__) || defined(__BIG_ENDIAN__)) + #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ #define __LITTLE_ENDIAN__ #else @@ -125,22 +137,24 @@ typedef int64_t JSLONG; #endif +#endif + #if !defined(__LITTLE_ENDIAN__) && !defined(__BIG_ENDIAN__) #error "Endianess not supported" #endif enum JSTYPES { - JT_NULL, // NULL - JT_TRUE, //boolean true - JT_FALSE, //boolean false - JT_INT, //(JSINT32 (signed 32-bit)) - JT_LONG, //(JSINT64 (signed 64-bit)) - JT_DOUBLE, //(double) - JT_UTF8, //(char 8-bit) - JT_ARRAY, // Array structure - JT_OBJECT, // Key/Value structure - JT_INVALID, // Internal, do not return nor expect + JT_NULL, // NULL + JT_TRUE, //boolean true + JT_FALSE, //boolean false + JT_INT, //(JSINT32 (signed 32-bit)) + JT_LONG, //(JSINT64 (signed 64-bit)) + JT_DOUBLE, //(double) + JT_UTF8, //(char 8-bit) + JT_ARRAY, // Array structure + JT_OBJECT, // Key/Value structure + JT_INVALID, // Internal, do not return nor expect }; typedef void * JSOBJ; @@ -148,9 +162,9 @@ typedef void * JSITER; typedef struct __JSONTypeContext { - int type; - void *encoder; - void *prv; + int type; + void *encoder; + void *prv; } JSONTypeContext; /* @@ -166,79 +180,82 @@ typedef void *(*JSPFN_REALLOC)(void *base, size_t size); typedef struct __JSONObjectEncoder { - void (*beginTypeContext)(JSOBJ obj, JSONTypeContext *tc); - void (*endTypeContext)(JSOBJ obj, JSONTypeContext *tc); - const char *(*getStringValue)(JSOBJ obj, JSONTypeContext *tc, size_t *_outLen); - JSINT64 (*getLongValue)(JSOBJ obj, JSONTypeContext *tc); - JSINT32 (*getIntValue)(JSOBJ obj, JSONTypeContext *tc); - double (*getDoubleValue)(JSOBJ obj, JSONTypeContext *tc); - - /* - Begin iteration of an iteratable object (JS_ARRAY or JS_OBJECT) - Implementor should setup iteration state in ti->prv - */ - JSPFN_ITERBEGIN iterBegin; - - /* - Retrieve next object in an iteration. Should return 0 to indicate iteration has reached end or 1 if there are more items. - Implementor is responsible for keeping state of the iteration. Use ti->prv fields for this - */ - JSPFN_ITERNEXT iterNext; - - /* - Ends the iteration of an iteratable object. - Any iteration state stored in ti->prv can be freed here - */ - JSPFN_ITEREND iterEnd; - - /* - Returns a reference to the value object of an iterator - The is responsible for the life-cycle of the returned string. Use iterNext/iterEnd and ti->prv to keep track of current object - */ - JSPFN_ITERGETVALUE iterGetValue; - - /* - Return name of iterator. - The is responsible for the life-cycle of the returned string. Use iterNext/iterEnd and ti->prv to keep track of current object - */ - JSPFN_ITERGETNAME iterGetName; - - /* - Release a value as indicated by setting ti->release = 1 in the previous getValue call. - The ti->prv array should contain the necessary context to release the value - */ - void (*releaseObject)(JSOBJ obj); - - /* Library functions - Set to NULL to use STDLIB malloc,realloc,free */ - JSPFN_MALLOC malloc; - JSPFN_REALLOC realloc; - JSPFN_FREE free; - - /* - Configuration for max recursion, set to 0 to use default (see JSON_MAX_RECURSION_DEPTH)*/ - int recursionMax; - - /* - Configuration for max decimals of double floating poiunt numbers to encode (0-9) */ - int doublePrecision; - - /* - If true output will be ASCII with all characters above 127 encoded as \uXXXX. If false output will be UTF-8 or what ever charset strings are brought as */ - int forceASCII; - - - /* - Set to an error message if error occured */ - const char *errorMsg; - JSOBJ errorObj; - - /* Buffer stuff */ - char *start; - char *offset; - char *end; - int heap; - int level; + void (*beginTypeContext)(JSOBJ obj, JSONTypeContext *tc); + void (*endTypeContext)(JSOBJ obj, JSONTypeContext *tc); + const char *(*getStringValue)(JSOBJ obj, JSONTypeContext *tc, size_t *_outLen); + JSINT64 (*getLongValue)(JSOBJ obj, JSONTypeContext *tc); + JSINT32 (*getIntValue)(JSOBJ obj, JSONTypeContext *tc); + double (*getDoubleValue)(JSOBJ obj, JSONTypeContext *tc); + + /* + Begin iteration of an iteratable object (JS_ARRAY or JS_OBJECT) + Implementor should setup iteration state in ti->prv + */ + JSPFN_ITERBEGIN iterBegin; + + /* + Retrieve next object in an iteration. Should return 0 to indicate iteration has reached end or 1 if there are more items. + Implementor is responsible for keeping state of the iteration. Use ti->prv fields for this + */ + JSPFN_ITERNEXT iterNext; + + /* + Ends the iteration of an iteratable object. + Any iteration state stored in ti->prv can be freed here + */ + JSPFN_ITEREND iterEnd; + + /* + Returns a reference to the value object of an iterator + The is responsible for the life-cycle of the returned string. Use iterNext/iterEnd and ti->prv to keep track of current object + */ + JSPFN_ITERGETVALUE iterGetValue; + + /* + Return name of iterator. + The is responsible for the life-cycle of the returned string. Use iterNext/iterEnd and ti->prv to keep track of current object + */ + JSPFN_ITERGETNAME iterGetName; + + /* + Release a value as indicated by setting ti->release = 1 in the previous getValue call. + The ti->prv array should contain the necessary context to release the value + */ + void (*releaseObject)(JSOBJ obj); + + /* Library functions + Set to NULL to use STDLIB malloc,realloc,free */ + JSPFN_MALLOC malloc; + JSPFN_REALLOC realloc; + JSPFN_FREE free; + + /* + Configuration for max recursion, set to 0 to use default (see JSON_MAX_RECURSION_DEPTH)*/ + int recursionMax; + + /* + Configuration for max decimals of double floating poiunt numbers to encode (0-9) */ + int doublePrecision; + + /* + If true output will be ASCII with all characters above 127 encoded as \uXXXX. If false output will be UTF-8 or what ever charset strings are brought as */ + int forceASCII; + + /* + If true, '<', '>', and '&' characters will be encoded as \u003c, \u003e, and \u0026, respectively. If false, no special encoding will be used. */ + int encodeHTMLChars; + + /* + Set to an error message if error occured */ + const char *errorMsg; + JSOBJ errorObj; + + /* Buffer stuff */ + char *start; + char *offset; + char *end; + int heap; + int level; } JSONObjectEncoder; @@ -268,29 +285,27 @@ EXPORTFUNCTION char *JSON_EncodeObject(JSOBJ obj, JSONObjectEncoder *enc, char * typedef struct __JSONObjectDecoder { - JSOBJ (*newString)(wchar_t *start, wchar_t *end); - int (*objectAddKey)(JSOBJ obj, JSOBJ name, JSOBJ value); - int (*arrayAddItem)(JSOBJ obj, JSOBJ value); - JSOBJ (*newTrue)(void); - JSOBJ (*newFalse)(void); - JSOBJ (*newNull)(void); - JSOBJ (*newObject)(void *decoder); - JSOBJ (*endObject)(JSOBJ obj); - JSOBJ (*newArray)(void *decoder); - JSOBJ (*endArray)(JSOBJ obj); - JSOBJ (*newInt)(JSINT32 value); - JSOBJ (*newLong)(JSINT64 value); - JSOBJ (*newDouble)(double value); - void (*releaseObject)(JSOBJ obj, void *decoder); - JSPFN_MALLOC malloc; - JSPFN_FREE free; - JSPFN_REALLOC realloc; - - char *errorStr; - char *errorOffset; - - - + JSOBJ (*newString)(void *prv, wchar_t *start, wchar_t *end); + int (*objectAddKey)(void *prv, JSOBJ obj, JSOBJ name, JSOBJ value); + int (*arrayAddItem)(void *prv, JSOBJ obj, JSOBJ value); + JSOBJ (*newTrue)(void *prv); + JSOBJ (*newFalse)(void *prv); + JSOBJ (*newNull)(void *prv); + JSOBJ (*newObject)(void *prv, void *decoder); + JSOBJ (*endObject)(void *prv, JSOBJ obj); + JSOBJ (*newArray)(void *prv, void *decoder); + JSOBJ (*endArray)(void *prv, JSOBJ obj); + JSOBJ (*newInt)(void *prv, JSINT32 value); + JSOBJ (*newLong)(void *prv, JSINT64 value); + JSOBJ (*newDouble)(void *prv, double value); + void (*releaseObject)(void *prv, JSOBJ obj, void *decoder); + JSPFN_MALLOC malloc; + JSPFN_FREE free; + JSPFN_REALLOC realloc; + char *errorStr; + char *errorOffset; + int preciseFloat; + void *prv; } JSONObjectDecoder; EXPORTFUNCTION JSOBJ JSON_DecodeObject(JSONObjectDecoder *dec, const char *buffer, size_t cbBuffer); diff --git a/pandas/src/ujson/lib/ultrajsondec.c b/pandas/src/ujson/lib/ultrajsondec.c index eda30f3fea839..c5cf341ad3092 100644 --- a/pandas/src/ujson/lib/ultrajsondec.c +++ b/pandas/src/ujson/lib/ultrajsondec.c @@ -1,37 +1,38 @@ /* -Copyright (c) 2011, Jonas Tarnstrom and ESN Social Software AB +Copyright (c) 2011-2013, ESN Social Software AB and Jonas Tarnstrom All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: -1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. -2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. -3. All advertising materials mentioning features or use of this software - must display the following acknowledgement: - This product includes software developed by ESN Social Software AB (www.esn.me). -4. Neither the name of the ESN Social Software AB nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY ESN SOCIAL SOFTWARE AB ''AS IS'' AND ANY -EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +* Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +* Neither the name of the ESN Social Software AB nor the +names of its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL ESN SOCIAL SOFTWARE AB BE LIABLE FOR ANY -DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +DISCLAIMED. IN NO EVENT SHALL ESN SOCIAL SOFTWARE AB OR JONAS TARNSTROM BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -Portions of code from: -MODP_ASCII - Ascii transformations (upper/lower, etc) + +Portions of code from MODP_ASCII - Ascii transformations (upper/lower, etc) http://code.google.com/p/stringencoders/ Copyright (c) 2007 Nick Galbreath -- nickg [at] modp [dot] com. All rights reserved. +Numeric decoder derived from from TCL library +http://www.opensource.apple.com/source/tcl/tcl-14/tcl/license.terms +* Copyright (c) 1988-1993 The Regents of the University of California. +* Copyright (c) 1994 Sun Microsystems, Inc. */ #include "ultrajson.h" @@ -40,806 +41,871 @@ Copyright (c) 2007 Nick Galbreath -- nickg [at] modp [dot] com. All rights rese #include #include #include +#include +#include + +#ifndef TRUE +#define TRUE 1 +#define FALSE 0 +#endif +#ifndef NULL +#define NULL 0 +#endif struct DecoderState { - char *start; - char *end; - wchar_t *escStart; - wchar_t *escEnd; - int escHeap; - int lastType; - JSONObjectDecoder *dec; + char *start; + char *end; + wchar_t *escStart; + wchar_t *escEnd; + int escHeap; + int lastType; + JSUINT32 objDepth; + void *prv; + JSONObjectDecoder *dec; }; JSOBJ FASTCALL_MSVC decode_any( struct DecoderState *ds) FASTCALL_ATTR; typedef JSOBJ (*PFN_DECODER)( struct DecoderState *ds); -#define RETURN_JSOBJ_NULLCHECK(_expr) return(_expr); -double createDouble(double intNeg, double intValue, double frcValue, int frcDecimalCount) +static JSOBJ SetError( struct DecoderState *ds, int offset, const char *message) { - static const double g_pow10[] = {1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000, 10000000000, 100000000000, 1000000000000, 10000000000000, 100000000000000, 1000000000000000}; - - return (intValue + (frcValue / g_pow10[frcDecimalCount])) * intNeg; + ds->dec->errorOffset = ds->start + offset; + ds->dec->errorStr = (char *) message; + return NULL; } -static JSOBJ SetError( struct DecoderState *ds, int offset, const char *message) +static void ClearError( struct DecoderState *ds) { - ds->dec->errorOffset = ds->start + offset; - ds->dec->errorStr = (char *) message; - return NULL; + ds->dec->errorOffset = 0; + ds->dec->errorStr = NULL; } +double createDouble(double intNeg, double intValue, double frcValue, int frcDecimalCount) +{ + static const double g_pow10[] = {1.0, 0.1, 0.01, 0.001, 0.0001, 0.00001, 0.000001,0.0000001, 0.00000001, 0.000000001, 0.0000000001, 0.00000000001, 0.000000000001, 0.0000000000001, 0.00000000000001, 0.000000000000001}; + return (intValue + (frcValue * g_pow10[frcDecimalCount])) * intNeg; +} -FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_numeric ( struct DecoderState *ds) +FASTCALL_ATTR JSOBJ FASTCALL_MSVC decodePreciseFloat(struct DecoderState *ds) { -#ifdef JSON_DECODE_NUMERIC_AS_DOUBLE - double intNeg = 1; - double intValue; -#else - int intNeg = 1; - JSLONG intValue; -#endif + char *end; + double value; + errno = 0; - double expNeg; - int chr; - int decimalCount = 0; - double frcValue = 0.0; - double expValue; - char *offset = ds->start; + value = strtod(ds->start, &end); - if (*(offset) == '-') - { - offset ++; - intNeg = -1; - } + if (errno == ERANGE) + { + return SetError(ds, -1, "Range error when decoding numeric as double"); + } - // Scan integer part - intValue = 0; + ds->start = end; + return ds->dec->newDouble(ds->prv, value); +} - while (1) - { - chr = (int) (unsigned char) *(offset); +FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_numeric (struct DecoderState *ds) +{ + int intNeg = 1; + int mantSize = 0; + JSUINT64 intValue; + int chr; + int decimalCount = 0; + double frcValue = 0.0; + double expNeg; + double expValue; + char *offset = ds->start; + + JSUINT64 overflowLimit = LLONG_MAX; + + if (*(offset) == '-') + { + offset ++; + intNeg = -1; + overflowLimit = LLONG_MIN; + } + + // Scan integer part + intValue = 0; - switch (chr) + while (1) + { + chr = (int) (unsigned char) *(offset); + + switch (chr) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + { + //FIXME: Check for arithemtic overflow here + //PERF: Don't do 64-bit arithmetic here unless we know we have to + intValue = intValue * 10ULL + (JSLONG) (chr - 48); + + if (intValue > overflowLimit) { - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - //FIXME: Check for arithemtic overflow here - //PERF: Don't do 64-bit arithmetic here unless we know we have to -#ifdef JSON_DECODE_NUMERIC_AS_DOUBLE - intValue = intValue * 10.0 + (double) (chr - 48); -#else - intValue = intValue * 10LL + (JSLONG) (chr - 48); -#endif - offset ++; - break; - - case '.': - offset ++; - goto DECODE_FRACTION; - break; - - case 'e': - case 'E': - offset ++; - goto DECODE_EXPONENT; - break; - - default: - goto BREAK_INT_LOOP; - break; + return SetError(ds, -1, overflowLimit == LLONG_MAX ? "Value is too big" : "Value is too small"); } + + offset ++; + mantSize ++; + break; + } + case '.': + { + offset ++; + goto DECODE_FRACTION; + break; + } + case 'e': + case 'E': + { + offset ++; + goto DECODE_EXPONENT; + break; + } + + default: + { + goto BREAK_INT_LOOP; + break; + } } + } BREAK_INT_LOOP: - ds->lastType = JT_INT; - ds->start = offset; + ds->lastType = JT_INT; + ds->start = offset; - //If input string is LONGLONG_MIN here the value is already negative so we should not flip it - -#ifdef JSON_DECODE_NUMERIC_AS_DOUBLE -#else - if (intValue < 0) - { - intNeg = 1; - } -#endif - - //dbg1 = (intValue * intNeg); - //dbg2 = (JSLONG) dbg1; - -#ifdef JSON_DECODE_NUMERIC_AS_DOUBLE - if (intValue > (double) INT_MAX || intValue < (double) INT_MIN) -#else - if ( (intValue >> 31)) -#endif - { - RETURN_JSOBJ_NULLCHECK(ds->dec->newLong( (JSINT64) (intValue * (JSINT64) intNeg))); - } - else - { - RETURN_JSOBJ_NULLCHECK(ds->dec->newInt( (JSINT32) (intValue * intNeg))); - } + if ((intValue >> 31)) + { + return ds->dec->newLong(ds->prv, (JSINT64) (intValue * (JSINT64) intNeg)); + } + else + { + return ds->dec->newInt(ds->prv, (JSINT32) (intValue * intNeg)); + } +DECODE_FRACTION: + if (ds->dec->preciseFloat) + { + return decodePreciseFloat(ds); + } -DECODE_FRACTION: + // Scan fraction part + frcValue = 0.0; + for (;;) + { + chr = (int) (unsigned char) *(offset); - // Scan fraction part - frcValue = 0.0; - while (1) + switch (chr) { - chr = (int) (unsigned char) *(offset); - - switch (chr) + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + { + if (decimalCount < JSON_DOUBLE_MAX_DECIMALS) { - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - if (decimalCount < JSON_DOUBLE_MAX_DECIMALS) - { - frcValue = frcValue * 10.0 + (double) (chr - 48); - decimalCount ++; - } - offset ++; - break; - - case 'e': - case 'E': - offset ++; - goto DECODE_EXPONENT; - break; - - default: - goto BREAK_FRC_LOOP; + frcValue = frcValue * 10.0 + (double) (chr - 48); + decimalCount ++; } + offset ++; + break; + } + case 'e': + case 'E': + { + offset ++; + goto DECODE_EXPONENT; + break; + } + default: + { + goto BREAK_FRC_LOOP; + } } + } BREAK_FRC_LOOP: - - if (intValue < 0) - { - intNeg = 1; - } - - //FIXME: Check for arithemtic overflow here - ds->lastType = JT_DOUBLE; - ds->start = offset; - RETURN_JSOBJ_NULLCHECK(ds->dec->newDouble (createDouble( (double) intNeg, (double) intValue, frcValue, decimalCount))); + //FIXME: Check for arithemtic overflow here + ds->lastType = JT_DOUBLE; + ds->start = offset; + return ds->dec->newDouble (ds->prv, createDouble( (double) intNeg, (double) intValue, frcValue, decimalCount)); DECODE_EXPONENT: - expNeg = 1.0; + if (ds->dec->preciseFloat) + { + return decodePreciseFloat(ds); + } - if (*(offset) == '-') - { - expNeg = -1.0; - offset ++; - } - else - if (*(offset) == '+') - { - expNeg = +1.0; - offset ++; - } + expNeg = 1.0; - expValue = 0.0; + if (*(offset) == '-') + { + expNeg = -1.0; + offset ++; + } + else + if (*(offset) == '+') + { + expNeg = +1.0; + offset ++; + } - while (1) - { - chr = (int) (unsigned char) *(offset); + expValue = 0.0; - switch (chr) - { - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - expValue = expValue * 10.0 + (double) (chr - 48); - offset ++; - break; - - default: - goto BREAK_EXP_LOOP; + for (;;) + { + chr = (int) (unsigned char) *(offset); - } + switch (chr) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + { + expValue = expValue * 10.0 + (double) (chr - 48); + offset ++; + break; + } + default: + { + goto BREAK_EXP_LOOP; + } } + } BREAK_EXP_LOOP: - -#ifdef JSON_DECODE_NUMERIC_AS_DOUBLE -#else - if (intValue < 0) - { - intNeg = 1; - } -#endif - - //FIXME: Check for arithemtic overflow here - ds->lastType = JT_DOUBLE; - ds->start = offset; - RETURN_JSOBJ_NULLCHECK(ds->dec->newDouble (createDouble( (double) intNeg, (double) intValue , frcValue, decimalCount) * pow(10.0, expValue * expNeg))); + //FIXME: Check for arithemtic overflow here + ds->lastType = JT_DOUBLE; + ds->start = offset; + return ds->dec->newDouble (ds->prv, createDouble( (double) intNeg, (double) intValue , frcValue, decimalCount) * pow(10.0, expValue * expNeg)); } -FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_true ( struct DecoderState *ds) +FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_true ( struct DecoderState *ds) { - char *offset = ds->start; - offset ++; + char *offset = ds->start; + offset ++; - if (*(offset++) != 'r') - goto SETERROR; - if (*(offset++) != 'u') - goto SETERROR; - if (*(offset++) != 'e') - goto SETERROR; + if (*(offset++) != 'r') + goto SETERROR; + if (*(offset++) != 'u') + goto SETERROR; + if (*(offset++) != 'e') + goto SETERROR; - ds->lastType = JT_TRUE; - ds->start = offset; - RETURN_JSOBJ_NULLCHECK(ds->dec->newTrue()); + ds->lastType = JT_TRUE; + ds->start = offset; + return ds->dec->newTrue(ds->prv); SETERROR: - return SetError(ds, -1, "Unexpected character found when decoding 'true'"); + return SetError(ds, -1, "Unexpected character found when decoding 'true'"); } -FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_false ( struct DecoderState *ds) +FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_false ( struct DecoderState *ds) { - char *offset = ds->start; - offset ++; - - if (*(offset++) != 'a') - goto SETERROR; - if (*(offset++) != 'l') - goto SETERROR; - if (*(offset++) != 's') - goto SETERROR; - if (*(offset++) != 'e') - goto SETERROR; - - ds->lastType = JT_FALSE; - ds->start = offset; - RETURN_JSOBJ_NULLCHECK(ds->dec->newFalse()); + char *offset = ds->start; + offset ++; + + if (*(offset++) != 'a') + goto SETERROR; + if (*(offset++) != 'l') + goto SETERROR; + if (*(offset++) != 's') + goto SETERROR; + if (*(offset++) != 'e') + goto SETERROR; + + ds->lastType = JT_FALSE; + ds->start = offset; + return ds->dec->newFalse(ds->prv); SETERROR: - return SetError(ds, -1, "Unexpected character found when decoding 'false'"); - + return SetError(ds, -1, "Unexpected character found when decoding 'false'"); } - -FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_null ( struct DecoderState *ds) +FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_null ( struct DecoderState *ds) { - char *offset = ds->start; - offset ++; + char *offset = ds->start; + offset ++; - if (*(offset++) != 'u') - goto SETERROR; - if (*(offset++) != 'l') - goto SETERROR; - if (*(offset++) != 'l') - goto SETERROR; + if (*(offset++) != 'u') + goto SETERROR; + if (*(offset++) != 'l') + goto SETERROR; + if (*(offset++) != 'l') + goto SETERROR; - ds->lastType = JT_NULL; - ds->start = offset; - RETURN_JSOBJ_NULLCHECK(ds->dec->newNull()); + ds->lastType = JT_NULL; + ds->start = offset; + return ds->dec->newNull(ds->prv); SETERROR: - return SetError(ds, -1, "Unexpected character found when decoding 'null'"); + return SetError(ds, -1, "Unexpected character found when decoding 'null'"); } -FASTCALL_ATTR void FASTCALL_MSVC SkipWhitespace(struct DecoderState *ds) +FASTCALL_ATTR void FASTCALL_MSVC SkipWhitespace(struct DecoderState *ds) { - char *offset = ds->start; + char *offset; - while (1) + for (offset = ds->start; (ds->end - offset) > 0; offset ++) + { + switch (*offset) { - switch (*offset) - { - case ' ': - case '\t': - case '\r': - case '\n': - offset ++; - break; - - default: - ds->start = offset; - return; - } + case ' ': + case '\t': + case '\r': + case '\n': + break; + + default: + ds->start = offset; + return; } -} + } + if (offset == ds->end) + { + ds->start = ds->end; + } +} enum DECODESTRINGSTATE { - DS_ISNULL = 0x32, - DS_ISQUOTE, - DS_ISESCAPE, - DS_UTFLENERROR, + DS_ISNULL = 0x32, + DS_ISQUOTE, + DS_ISESCAPE, + DS_UTFLENERROR, }; -static const JSUINT8 g_decoderLookup[256] = +static const JSUINT8 g_decoderLookup[256] = { -/* 0x00 */ DS_ISNULL, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -/* 0x10 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -/* 0x20 */ 1, 1, DS_ISQUOTE, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -/* 0x30 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -/* 0x40 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -/* 0x50 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, DS_ISESCAPE, 1, 1, 1, -/* 0x60 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -/* 0x70 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -/* 0x80 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -/* 0x90 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -/* 0xa0 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -/* 0xb0 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -/* 0xc0 */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, -/* 0xd0 */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, -/* 0xe0 */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, -/* 0xf0 */ 4, 4, 4, 4, 4, 4, 4, 4, DS_UTFLENERROR, DS_UTFLENERROR, DS_UTFLENERROR, DS_UTFLENERROR, DS_UTFLENERROR, DS_UTFLENERROR, DS_UTFLENERROR, DS_UTFLENERROR, + /* 0x00 */ DS_ISNULL, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* 0x10 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* 0x20 */ 1, 1, DS_ISQUOTE, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* 0x30 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* 0x40 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* 0x50 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, DS_ISESCAPE, 1, 1, 1, + /* 0x60 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* 0x70 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* 0x80 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* 0x90 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* 0xa0 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* 0xb0 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* 0xc0 */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + /* 0xd0 */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + /* 0xe0 */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + /* 0xf0 */ 4, 4, 4, 4, 4, 4, 4, 4, DS_UTFLENERROR, DS_UTFLENERROR, DS_UTFLENERROR, DS_UTFLENERROR, DS_UTFLENERROR, DS_UTFLENERROR, DS_UTFLENERROR, DS_UTFLENERROR, }; - FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_string ( struct DecoderState *ds) { - JSUTF16 sur[2] = { 0 }; - int iSur = 0; - int index; - wchar_t *escOffset; - size_t escLen = (ds->escEnd - ds->escStart); - JSUINT8 *inputOffset; - JSUINT8 oct; - JSUTF32 ucs; - ds->lastType = JT_INVALID; - ds->start ++; - - if ( (ds->end - ds->start) > escLen) + JSUTF16 sur[2] = { 0 }; + int iSur = 0; + int index; + wchar_t *escOffset; + wchar_t *escStart; + size_t escLen = (ds->escEnd - ds->escStart); + JSUINT8 *inputOffset; + JSUINT8 oct; + JSUTF32 ucs; + ds->lastType = JT_INVALID; + ds->start ++; + + if ( (size_t) (ds->end - ds->start) > escLen) + { + size_t newSize = (ds->end - ds->start); + + if (ds->escHeap) { - size_t newSize = (ds->end - ds->start); - - if (ds->escHeap) - { - ds->escStart = (wchar_t *) ds->dec->realloc (ds->escStart, newSize * sizeof(wchar_t)); - if (!ds->escStart) - { - return SetError(ds, -1, "Could not reserve memory block"); - } - } - else - { - wchar_t *oldStart = ds->escStart; - ds->escHeap = 1; - ds->escStart = (wchar_t *) ds->dec->malloc (newSize * sizeof(wchar_t)); - if (!ds->escStart) - { - return SetError(ds, -1, "Could not reserve memory block"); - } - memcpy (ds->escStart, oldStart, escLen * sizeof(wchar_t)); - } - - ds->escEnd = ds->escStart + newSize; + if (newSize > (UINT_MAX / sizeof(wchar_t))) + { + return SetError(ds, -1, "Could not reserve memory block"); + } + escStart = (wchar_t *)ds->dec->realloc(ds->escStart, newSize * sizeof(wchar_t)); + if (!escStart) + { + ds->dec->free(ds->escStart); + return SetError(ds, -1, "Could not reserve memory block"); + } + ds->escStart = escStart; + } + else + { + wchar_t *oldStart = ds->escStart; + ds->escHeap = 1; + if (newSize > (UINT_MAX / sizeof(wchar_t))) + { + return SetError(ds, -1, "Could not reserve memory block"); + } + ds->escStart = (wchar_t *) ds->dec->malloc(newSize * sizeof(wchar_t)); + if (!ds->escStart) + { + return SetError(ds, -1, "Could not reserve memory block"); + } + memcpy(ds->escStart, oldStart, escLen * sizeof(wchar_t)); } - escOffset = ds->escStart; - inputOffset = ds->start; + ds->escEnd = ds->escStart + newSize; + } - while(1) + escOffset = ds->escStart; + inputOffset = (JSUINT8 *) ds->start; + + for (;;) + { + switch (g_decoderLookup[(JSUINT8)(*inputOffset)]) { - switch (g_decoderLookup[(JSUINT8)(*inputOffset)]) + case DS_ISNULL: + { + return SetError(ds, -1, "Unmatched ''\"' when when decoding 'string'"); + } + case DS_ISQUOTE: + { + ds->lastType = JT_UTF8; + inputOffset ++; + ds->start += ( (char *) inputOffset - (ds->start)); + return ds->dec->newString(ds->prv, ds->escStart, escOffset); + } + case DS_UTFLENERROR: + { + return SetError (ds, -1, "Invalid UTF-8 sequence length when decoding 'string'"); + } + case DS_ISESCAPE: + inputOffset ++; + switch (*inputOffset) { - case DS_ISNULL: - return SetError(ds, -1, "Unmatched ''\"' when when decoding 'string'"); - - case DS_ISQUOTE: - ds->lastType = JT_UTF8; + case '\\': *(escOffset++) = L'\\'; inputOffset++; continue; + case '\"': *(escOffset++) = L'\"'; inputOffset++; continue; + case '/': *(escOffset++) = L'/'; inputOffset++; continue; + case 'b': *(escOffset++) = L'\b'; inputOffset++; continue; + case 'f': *(escOffset++) = L'\f'; inputOffset++; continue; + case 'n': *(escOffset++) = L'\n'; inputOffset++; continue; + case 'r': *(escOffset++) = L'\r'; inputOffset++; continue; + case 't': *(escOffset++) = L'\t'; inputOffset++; continue; + + case 'u': + { + int index; inputOffset ++; - ds->start += ( (char *) inputOffset - (ds->start)); - RETURN_JSOBJ_NULLCHECK(ds->dec->newString(ds->escStart, escOffset)); - - case DS_UTFLENERROR: - return SetError (ds, -1, "Invalid UTF-8 sequence length when decoding 'string'"); - case DS_ISESCAPE: - inputOffset ++; - switch (*inputOffset) + for (index = 0; index < 4; index ++) { - case '\\': *(escOffset++) = L'\\'; inputOffset++; continue; - case '\"': *(escOffset++) = L'\"'; inputOffset++; continue; - case '/': *(escOffset++) = L'/'; inputOffset++; continue; - case 'b': *(escOffset++) = L'\b'; inputOffset++; continue; - case 'f': *(escOffset++) = L'\f'; inputOffset++; continue; - case 'n': *(escOffset++) = L'\n'; inputOffset++; continue; - case 'r': *(escOffset++) = L'\r'; inputOffset++; continue; - case 't': *(escOffset++) = L'\t'; inputOffset++; continue; - - case 'u': - { - int index; - inputOffset ++; - - for (index = 0; index < 4; index ++) - { - switch (*inputOffset) - { - case '\0': return SetError (ds, -1, "Unterminated unicode escape sequence when decoding 'string'"); - default: return SetError (ds, -1, "Unexpected character in unicode escape sequence when decoding 'string'"); - - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - sur[iSur] = (sur[iSur] << 4) + (JSUTF16) (*inputOffset - '0'); - break; - - case 'a': - case 'b': - case 'c': - case 'd': - case 'e': - case 'f': - sur[iSur] = (sur[iSur] << 4) + 10 + (JSUTF16) (*inputOffset - 'a'); - break; - - case 'A': - case 'B': - case 'C': - case 'D': - case 'E': - case 'F': - sur[iSur] = (sur[iSur] << 4) + 10 + (JSUTF16) (*inputOffset - 'A'); - break; - } - - inputOffset ++; - } - - - if (iSur == 0) - { - if((sur[iSur] & 0xfc00) == 0xd800) - { - // First of a surrogate pair, continue parsing - iSur ++; - break; - } - (*escOffset++) = (wchar_t) sur[iSur]; - iSur = 0; - } - else - { - // Decode pair - if ((sur[1] & 0xfc00) != 0xdc00) - { - return SetError (ds, -1, "Unpaired high surrogate when decoding 'string'"); - } + switch (*inputOffset) + { + case '\0': return SetError (ds, -1, "Unterminated unicode escape sequence when decoding 'string'"); + default: return SetError (ds, -1, "Unexpected character in unicode escape sequence when decoding 'string'"); + + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + sur[iSur] = (sur[iSur] << 4) + (JSUTF16) (*inputOffset - '0'); + break; + + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + sur[iSur] = (sur[iSur] << 4) + 10 + (JSUTF16) (*inputOffset - 'a'); + break; + + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + sur[iSur] = (sur[iSur] << 4) + 10 + (JSUTF16) (*inputOffset - 'A'); + break; + } + + inputOffset ++; + } + if (iSur == 0) + { + if((sur[iSur] & 0xfc00) == 0xd800) + { + // First of a surrogate pair, continue parsing + iSur ++; + break; + } + (*escOffset++) = (wchar_t) sur[iSur]; + iSur = 0; + } + else + { + // Decode pair + if ((sur[1] & 0xfc00) != 0xdc00) + { + return SetError (ds, -1, "Unpaired high surrogate when decoding 'string'"); + } #if WCHAR_MAX == 0xffff - (*escOffset++) = (wchar_t) sur[0]; - (*escOffset++) = (wchar_t) sur[1]; + (*escOffset++) = (wchar_t) sur[0]; + (*escOffset++) = (wchar_t) sur[1]; #else - (*escOffset++) = (wchar_t) 0x10000 + (((sur[0] - 0xd800) << 10) | (sur[1] - 0xdc00)); + (*escOffset++) = (wchar_t) 0x10000 + (((sur[0] - 0xd800) << 10) | (sur[1] - 0xdc00)); #endif - iSur = 0; - } - break; - } - - case '\0': return SetError(ds, -1, "Unterminated escape sequence when decoding 'string'"); - default: return SetError(ds, -1, "Unrecognized escape sequence when decoding 'string'"); + iSur = 0; } - break; - - case 1: - *(escOffset++) = (wchar_t) (*inputOffset++); - break; + break; + } - case 2: + case '\0': return SetError(ds, -1, "Unterminated escape sequence when decoding 'string'"); + default: return SetError(ds, -1, "Unrecognized escape sequence when decoding 'string'"); + } + break; + + case 1: + { + *(escOffset++) = (wchar_t) (*inputOffset++); + break; + } + + case 2: + { + ucs = (*inputOffset++) & 0x1f; + ucs <<= 6; + if (((*inputOffset) & 0x80) != 0x80) { - ucs = (*inputOffset++) & 0x1f; - ucs <<= 6; - if (((*inputOffset) & 0x80) != 0x80) - { - return SetError(ds, -1, "Invalid octet in UTF-8 sequence when decoding 'string'"); - } - ucs |= (*inputOffset++) & 0x3f; - if (ucs < 0x80) return SetError (ds, -1, "Overlong 2 byte UTF-8 sequence detected when decoding 'string'"); - *(escOffset++) = (wchar_t) ucs; - break; + return SetError(ds, -1, "Invalid octet in UTF-8 sequence when decoding 'string'"); } - - case 3: + ucs |= (*inputOffset++) & 0x3f; + if (ucs < 0x80) return SetError (ds, -1, "Overlong 2 byte UTF-8 sequence detected when decoding 'string'"); + *(escOffset++) = (wchar_t) ucs; + break; + } + + case 3: + { + JSUTF32 ucs = 0; + ucs |= (*inputOffset++) & 0x0f; + + for (index = 0; index < 2; index ++) { - JSUTF32 ucs = 0; - ucs |= (*inputOffset++) & 0x0f; + ucs <<= 6; + oct = (*inputOffset++); - for (index = 0; index < 2; index ++) - { - ucs <<= 6; - oct = (*inputOffset++); + if ((oct & 0x80) != 0x80) + { + return SetError(ds, -1, "Invalid octet in UTF-8 sequence when decoding 'string'"); + } - if ((oct & 0x80) != 0x80) - { - return SetError(ds, -1, "Invalid octet in UTF-8 sequence when decoding 'string'"); - } + ucs |= oct & 0x3f; + } - ucs |= oct & 0x3f; - } + if (ucs < 0x800) return SetError (ds, -1, "Overlong 3 byte UTF-8 sequence detected when encoding string"); + *(escOffset++) = (wchar_t) ucs; + break; + } - if (ucs < 0x800) return SetError (ds, -1, "Overlong 3 byte UTF-8 sequence detected when encoding string"); - *(escOffset++) = (wchar_t) ucs; - break; - } + case 4: + { + JSUTF32 ucs = 0; + ucs |= (*inputOffset++) & 0x07; - case 4: + for (index = 0; index < 3; index ++) { - JSUTF32 ucs = 0; - ucs |= (*inputOffset++) & 0x07; - - for (index = 0; index < 3; index ++) - { - ucs <<= 6; - oct = (*inputOffset++); + ucs <<= 6; + oct = (*inputOffset++); - if ((oct & 0x80) != 0x80) - { - return SetError(ds, -1, "Invalid octet in UTF-8 sequence when decoding 'string'"); - } + if ((oct & 0x80) != 0x80) + { + return SetError(ds, -1, "Invalid octet in UTF-8 sequence when decoding 'string'"); + } - ucs |= oct & 0x3f; - } + ucs |= oct & 0x3f; + } - if (ucs < 0x10000) return SetError (ds, -1, "Overlong 4 byte UTF-8 sequence detected when decoding 'string'"); + if (ucs < 0x10000) return SetError (ds, -1, "Overlong 4 byte UTF-8 sequence detected when decoding 'string'"); - #if WCHAR_MAX == 0xffff - if (ucs >= 0x10000) - { - ucs -= 0x10000; - *(escOffset++) = (ucs >> 10) + 0xd800; - *(escOffset++) = (ucs & 0x3ff) + 0xdc00; - } - else - { - *(escOffset++) = (wchar_t) ucs; - } - #else - *(escOffset++) = (wchar_t) ucs; - #endif - break; +#if WCHAR_MAX == 0xffff + if (ucs >= 0x10000) + { + ucs -= 0x10000; + *(escOffset++) = (wchar_t) (ucs >> 10) + 0xd800; + *(escOffset++) = (wchar_t) (ucs & 0x3ff) + 0xdc00; } + else + { + *(escOffset++) = (wchar_t) ucs; } +#else + *(escOffset++) = (wchar_t) ucs; +#endif + break; + } } + } } -FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_array( struct DecoderState *ds) +FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_array(struct DecoderState *ds) { - JSOBJ itemValue; - JSOBJ newObj = ds->dec->newArray(ds->dec); + JSOBJ itemValue; + JSOBJ newObj; + int len; + ds->objDepth++; + if (ds->objDepth > JSON_MAX_OBJECT_DEPTH) { + return SetError(ds, -1, "Reached object decoding depth limit"); + } - ds->lastType = JT_INVALID; - ds->start ++; + newObj = ds->dec->newArray(ds->prv, ds->dec); + len = 0; - while (1)//(*ds->start) != '\0') - { - SkipWhitespace(ds); + ds->lastType = JT_INVALID; + ds->start ++; - if ((*ds->start) == ']') - { - ds->start++; - return ds->dec->endArray(newObj); - } + for (;;) + { + SkipWhitespace(ds); - itemValue = decode_any(ds); + if ((*ds->start) == ']') + { + ds->objDepth--; + if (len == 0) + { + ds->start ++; + return ds->dec->endArray(ds->prv, newObj); + } + + ds->dec->releaseObject(ds->prv, newObj, ds->dec); + return SetError(ds, -1, "Unexpected character found when decoding array value (1)"); + } - if (itemValue == NULL) - { - ds->dec->releaseObject(newObj, ds->dec); - return NULL; - } + itemValue = decode_any(ds); - if (!ds->dec->arrayAddItem (newObj, itemValue)) - { - ds->dec->releaseObject(newObj, ds->dec); - return NULL; - } + if (itemValue == NULL) + { + ds->dec->releaseObject(ds->prv, newObj, ds->dec); + return NULL; + } - SkipWhitespace(ds); + if (!ds->dec->arrayAddItem (ds->prv, newObj, itemValue)) + { + ds->dec->releaseObject(ds->prv, newObj, ds->dec); + return NULL; + } - switch (*(ds->start++)) - { - case ']': - return ds->dec->endArray(newObj); + SkipWhitespace(ds); - case ',': - break; + switch (*(ds->start++)) + { + case ']': + { + ds->objDepth--; + return ds->dec->endArray(ds->prv, newObj); + } + case ',': + break; - default: - ds->dec->releaseObject(newObj, ds->dec); - return SetError(ds, -1, "Unexpected character in found when decoding array value"); - } + default: + ds->dec->releaseObject(ds->prv, newObj, ds->dec); + return SetError(ds, -1, "Unexpected character found when decoding array value (2)"); } - ds->dec->releaseObject(newObj, ds->dec); - return SetError(ds, -1, "Unmatched ']' when decoding 'array'"); + len ++; + } } - - FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_object( struct DecoderState *ds) { - JSOBJ itemName; - JSOBJ itemValue; - JSOBJ newObj = ds->dec->newObject(ds->dec); + JSOBJ itemName; + JSOBJ itemValue; + JSOBJ newObj; - ds->start ++; + ds->objDepth++; + if (ds->objDepth > JSON_MAX_OBJECT_DEPTH) { + return SetError(ds, -1, "Reached object decoding depth limit"); + } - while (1) - { - SkipWhitespace(ds); + newObj = ds->dec->newObject(ds->prv, ds->dec); - if ((*ds->start) == '}') - { - ds->start ++; - return ds->dec->endObject(newObj); - } + ds->start ++; - ds->lastType = JT_INVALID; - itemName = decode_any(ds); + for (;;) + { + SkipWhitespace(ds); - if (itemName == NULL) - { - ds->dec->releaseObject(newObj, ds->dec); - return NULL; - } + if ((*ds->start) == '}') + { + ds->objDepth--; + ds->start ++; + return ds->dec->endObject(ds->prv, newObj); + } - if (ds->lastType != JT_UTF8) - { - ds->dec->releaseObject(newObj, ds->dec); - ds->dec->releaseObject(itemName, ds->dec); - return SetError(ds, -1, "Key name of object must be 'string' when decoding 'object'"); - } + ds->lastType = JT_INVALID; + itemName = decode_any(ds); - SkipWhitespace(ds); + if (itemName == NULL) + { + ds->dec->releaseObject(ds->prv, newObj, ds->dec); + return NULL; + } - if (*(ds->start++) != ':') - { - ds->dec->releaseObject(newObj, ds->dec); - ds->dec->releaseObject(itemName, ds->dec); - return SetError(ds, -1, "No ':' found when decoding object value"); - } + if (ds->lastType != JT_UTF8) + { + ds->dec->releaseObject(ds->prv, newObj, ds->dec); + ds->dec->releaseObject(ds->prv, itemName, ds->dec); + return SetError(ds, -1, "Key name of object must be 'string' when decoding 'object'"); + } - SkipWhitespace(ds); + SkipWhitespace(ds); - itemValue = decode_any(ds); + if (*(ds->start++) != ':') + { + ds->dec->releaseObject(ds->prv, newObj, ds->dec); + ds->dec->releaseObject(ds->prv, itemName, ds->dec); + return SetError(ds, -1, "No ':' found when decoding object value"); + } - if (itemValue == NULL) - { - ds->dec->releaseObject(newObj, ds->dec); - ds->dec->releaseObject(itemName, ds->dec); - return NULL; - } + SkipWhitespace(ds); - if (!ds->dec->objectAddKey (newObj, itemName, itemValue)) - { - ds->dec->releaseObject(newObj, ds->dec); - ds->dec->releaseObject(itemName, ds->dec); - ds->dec->releaseObject(itemValue, ds->dec); - return NULL; - } + itemValue = decode_any(ds); - SkipWhitespace(ds); + if (itemValue == NULL) + { + ds->dec->releaseObject(ds->prv, newObj, ds->dec); + ds->dec->releaseObject(ds->prv, itemName, ds->dec); + return NULL; + } - switch (*(ds->start++)) - { - case '}': - return ds->dec->endObject(newObj); + if (!ds->dec->objectAddKey (ds->prv, newObj, itemName, itemValue)) + { + ds->dec->releaseObject(ds->prv, newObj, ds->dec); + ds->dec->releaseObject(ds->prv, itemName, ds->dec); + ds->dec->releaseObject(ds->prv, itemValue, ds->dec); + return NULL; + } - case ',': - break; + SkipWhitespace(ds); - default: - ds->dec->releaseObject(newObj, ds->dec); - return SetError(ds, -1, "Unexpected character in found when decoding object value"); - } + switch (*(ds->start++)) + { + case '}': + { + ds->objDepth--; + return ds->dec->endObject(ds->prv, newObj); + } + case ',': + break; + + default: + ds->dec->releaseObject(ds->prv, newObj, ds->dec); + return SetError(ds, -1, "Unexpected character in found when decoding object value"); } - - ds->dec->releaseObject(newObj, ds->dec); - return SetError(ds, -1, "Unmatched '}' when decoding object"); + } } FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_any(struct DecoderState *ds) { - while (1) + for (;;) + { + switch (*ds->start) { - switch (*ds->start) - { - case '\"': - return decode_string (ds); - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - case '-': - return decode_numeric (ds); - - case '[': return decode_array (ds); - case '{': return decode_object (ds); - case 't': return decode_true (ds); - case 'f': return decode_false (ds); - case 'n': return decode_null (ds); - - case ' ': - case '\t': - case '\r': - case '\n': - // White space - ds->start ++; - break; - - default: - return SetError(ds, -1, "Expected object or value"); - } + case '\"': + return decode_string (ds); + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case '-': + return decode_numeric (ds); + + case '[': return decode_array (ds); + case '{': return decode_object (ds); + case 't': return decode_true (ds); + case 'f': return decode_false (ds); + case 'n': return decode_null (ds); + + case ' ': + case '\t': + case '\r': + case '\n': + // White space + ds->start ++; + break; + + default: + return SetError(ds, -1, "Expected object or value"); } + } } - JSOBJ JSON_DecodeObject(JSONObjectDecoder *dec, const char *buffer, size_t cbBuffer) { - - /* - FIXME: Base the size of escBuffer of that of cbBuffer so that the unicode escaping doesn't run into the wall each time */ - struct DecoderState ds; - wchar_t escBuffer[(JSON_MAX_STACK_BUFFER_SIZE / sizeof(wchar_t))]; - JSOBJ ret; - - ds.start = (char *) buffer; - ds.end = ds.start + cbBuffer; - - ds.escStart = escBuffer; - ds.escEnd = ds.escStart + (JSON_MAX_STACK_BUFFER_SIZE / sizeof(wchar_t)); - ds.escHeap = 0; - ds.dec = dec; - ds.dec->errorStr = NULL; - ds.dec->errorOffset = NULL; - - ds.dec = dec; - - ret = decode_any (&ds); - - if (ds.escHeap) - { - dec->free(ds.escStart); - } - return ret; + /* + FIXME: Base the size of escBuffer of that of cbBuffer so that the unicode escaping doesn't run into the wall each time */ + struct DecoderState ds; + wchar_t escBuffer[(JSON_MAX_STACK_BUFFER_SIZE / sizeof(wchar_t))]; + JSOBJ ret; + + ds.start = (char *) buffer; + ds.end = ds.start + cbBuffer; + + ds.escStart = escBuffer; + ds.escEnd = ds.escStart + (JSON_MAX_STACK_BUFFER_SIZE / sizeof(wchar_t)); + ds.escHeap = 0; + ds.prv = dec->prv; + ds.dec = dec; + ds.dec->errorStr = NULL; + ds.dec->errorOffset = NULL; + ds.objDepth = 0; + + ds.dec = dec; + + ret = decode_any (&ds); + + if (ds.escHeap) + { + dec->free(ds.escStart); + } + + SkipWhitespace(&ds); + + if (ds.start != ds.end && ret) + { + dec->releaseObject(ds.prv, ret, ds.dec); + return SetError(&ds, -1, "Trailing data"); + } + + return ret; } diff --git a/pandas/src/ujson/lib/ultrajsonenc.c b/pandas/src/ujson/lib/ultrajsonenc.c index 22871513870b7..01fc7c10fe755 100644 --- a/pandas/src/ujson/lib/ultrajsonenc.c +++ b/pandas/src/ujson/lib/ultrajsonenc.c @@ -1,37 +1,38 @@ /* -Copyright (c) 2011, Jonas Tarnstrom and ESN Social Software AB +Copyright (c) 2011-2013, ESN Social Software AB and Jonas Tarnstrom All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: -1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. -2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. -3. All advertising materials mentioning features or use of this software - must display the following acknowledgement: - This product includes software developed by ESN Social Software AB (www.esn.me). -4. Neither the name of the ESN Social Software AB nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY ESN SOCIAL SOFTWARE AB ''AS IS'' AND ANY -EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the ESN Social Software AB nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL ESN SOCIAL SOFTWARE AB BE LIABLE FOR ANY -DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +DISCLAIMED. IN NO EVENT SHALL ESN SOCIAL SOFTWARE AB OR JONAS TARNSTROM BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -Portions of code from: -MODP_ASCII - Ascii transformations (upper/lower, etc) + +Portions of code from MODP_ASCII - Ascii transformations (upper/lower, etc) http://code.google.com/p/stringencoders/ Copyright (c) 2007 Nick Galbreath -- nickg [at] modp [dot] com. All rights reserved. +Numeric decoder derived from from TCL library +http://www.opensource.apple.com/source/tcl/tcl-14/tcl/license.terms + * Copyright (c) 1988-1993 The Regents of the University of California. + * Copyright (c) 1994 Sun Microsystems, Inc. */ #include "ultrajson.h" @@ -50,42 +51,57 @@ Copyright (c) 2007 Nick Galbreath -- nickg [at] modp [dot] com. All rights rese #define FALSE 0 #endif +/* +Worst cases being: + +Control characters (ASCII < 32) +0x00 (1 byte) input => \u0000 output (6 bytes) +1 * 6 => 6 (6 bytes required) + +or UTF-16 surrogate pairs +4 bytes input in UTF-8 => \uXXXX\uYYYY (12 bytes). + +4 * 6 => 24 bytes (12 bytes required) + +The extra 2 bytes are for the quotes around the string + +*/ +#define RESERVE_STRING(_len) (2 + ((_len) * 6)) + static const double g_pow10[] = {1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000, 10000000000, 100000000000, 1000000000000, 10000000000000, 100000000000000, 1000000000000000}; static const char g_hexChars[] = "0123456789abcdef"; static const char g_escapeChars[] = "0123456789\\b\\t\\n\\f\\r\\\"\\\\\\/"; - /* FIXME: While this is fine dandy and working it's a magic value mess which probably only the author understands. Needs a cleanup and more documentation */ /* Table for pure ascii output escaping all characters above 127 to \uXXXX */ -static const JSUINT8 g_asciiOutputTable[256] = +static const JSUINT8 g_asciiOutputTable[256] = { -/* 0x00 */ 0, 30, 30, 30, 30, 30, 30, 30, 10, 12, 14, 30, 16, 18, 30, 30, +/* 0x00 */ 0, 30, 30, 30, 30, 30, 30, 30, 10, 12, 14, 30, 16, 18, 30, 30, /* 0x10 */ 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, -/* 0x20 */ 1, 1, 20, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 24, -/* 0x30 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -/* 0x40 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +/* 0x20 */ 1, 1, 20, 1, 1, 1, 29, 1, 1, 1, 1, 1, 1, 1, 1, 24, +/* 0x30 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 29, 1, 29, 1, +/* 0x40 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x50 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 22, 1, 1, 1, -/* 0x60 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +/* 0x60 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x70 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -/* 0x80 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +/* 0x80 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x90 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -/* 0xa0 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +/* 0xa0 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xb0 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -/* 0xc0 */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, +/* 0xc0 */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 0xd0 */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, -/* 0xe0 */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, +/* 0xe0 */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, /* 0xf0 */ 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1 }; - static void SetError (JSOBJ obj, JSONObjectEncoder *enc, const char *message) { - enc->errorMsg = message; - enc->errorObj = obj; + enc->errorMsg = message; + enc->errorObj = obj; } /* @@ -93,332 +109,357 @@ FIXME: Keep track of how big these get across several encoder calls and try to m That way we won't run our head into the wall each call */ void Buffer_Realloc (JSONObjectEncoder *enc, size_t cbNeeded) { - size_t curSize = enc->end - enc->start; - size_t newSize = curSize * 2; - size_t offset = enc->offset - enc->start; - - while (newSize < curSize + cbNeeded) + size_t curSize = enc->end - enc->start; + size_t newSize = curSize * 2; + size_t offset = enc->offset - enc->start; + + while (newSize < curSize + cbNeeded) + { + newSize *= 2; + } + + if (enc->heap) + { + enc->start = (char *) enc->realloc (enc->start, newSize); + if (!enc->start) { - newSize *= 2; + SetError (NULL, enc, "Could not reserve memory block"); + return; } - - if (enc->heap) + } + else + { + char *oldStart = enc->start; + enc->heap = 1; + enc->start = (char *) enc->malloc (newSize); + if (!enc->start) { - enc->start = (char *) enc->realloc (enc->start, newSize); - if (!enc->start) - { - SetError (NULL, enc, "Could not reserve memory block"); - return; - } + SetError (NULL, enc, "Could not reserve memory block"); + return; } - else - { - char *oldStart = enc->start; - enc->heap = 1; - enc->start = (char *) enc->malloc (newSize); - if (!enc->start) - { - SetError (NULL, enc, "Could not reserve memory block"); - return; - } - memcpy (enc->start, oldStart, offset); - } - enc->offset = enc->start + offset; - enc->end = enc->start + newSize; + memcpy (enc->start, oldStart, offset); + } + enc->offset = enc->start + offset; + enc->end = enc->start + newSize; } FASTCALL_ATTR INLINE_PREFIX void FASTCALL_MSVC Buffer_AppendShortHexUnchecked (char *outputOffset, unsigned short value) { - *(outputOffset++) = g_hexChars[(value & 0xf000) >> 12]; - *(outputOffset++) = g_hexChars[(value & 0x0f00) >> 8]; - *(outputOffset++) = g_hexChars[(value & 0x00f0) >> 4]; - *(outputOffset++) = g_hexChars[(value & 0x000f) >> 0]; + *(outputOffset++) = g_hexChars[(value & 0xf000) >> 12]; + *(outputOffset++) = g_hexChars[(value & 0x0f00) >> 8]; + *(outputOffset++) = g_hexChars[(value & 0x00f0) >> 4]; + *(outputOffset++) = g_hexChars[(value & 0x000f) >> 0]; } -int Buffer_EscapeStringUnvalidated (JSOBJ obj, JSONObjectEncoder *enc, const char *io, const char *end) +int Buffer_EscapeStringUnvalidated (JSONObjectEncoder *enc, const char *io, const char *end) { - char *of = (char *) enc->offset; + char *of = (char *) enc->offset; - while (1) + for (;;) + { + switch (*io) { - switch (*io) + case 0x00: + { + if (io < end) { - case 0x00: - if (io < end) - { - *(of++) = '\\'; - *(of++) = 'u'; - *(of++) = '0'; - *(of++) = '0'; - *(of++) = '0'; - *(of++) = '0'; - break; - } - else - { - enc->offset += (of - enc->offset); - return TRUE; - } - - case '\"': (*of++) = '\\'; (*of++) = '\"'; break; - case '\\': (*of++) = '\\'; (*of++) = '\\'; break; - case '/': (*of++) = '\\'; (*of++) = '/'; break; - case '\b': (*of++) = '\\'; (*of++) = 'b'; break; - case '\f': (*of++) = '\\'; (*of++) = 'f'; break; - case '\n': (*of++) = '\\'; (*of++) = 'n'; break; - case '\r': (*of++) = '\\'; (*of++) = 'r'; break; - case '\t': (*of++) = '\\'; (*of++) = 't'; break; - - case 0x01: - case 0x02: - case 0x03: - case 0x04: - case 0x05: - case 0x06: - case 0x07: - case 0x0b: - case 0x0e: - case 0x0f: - case 0x10: - case 0x11: - case 0x12: - case 0x13: - case 0x14: - case 0x15: - case 0x16: - case 0x17: - case 0x18: - case 0x19: - case 0x1a: - case 0x1b: - case 0x1c: - case 0x1d: - case 0x1e: - case 0x1f: - *(of++) = '\\'; - *(of++) = 'u'; - *(of++) = '0'; - *(of++) = '0'; - *(of++) = g_hexChars[ (unsigned char) (((*io) & 0xf0) >> 4)]; - *(of++) = g_hexChars[ (unsigned char) ((*io) & 0x0f)]; - break; - - default: (*of++) = (*io); break; + *(of++) = '\\'; + *(of++) = 'u'; + *(of++) = '0'; + *(of++) = '0'; + *(of++) = '0'; + *(of++) = '0'; + break; } - - io++; - } - - return FALSE; + else + { + enc->offset += (of - enc->offset); + return TRUE; + } + } + case '\"': (*of++) = '\\'; (*of++) = '\"'; break; + case '\\': (*of++) = '\\'; (*of++) = '\\'; break; + case '/': (*of++) = '\\'; (*of++) = '/'; break; + case '\b': (*of++) = '\\'; (*of++) = 'b'; break; + case '\f': (*of++) = '\\'; (*of++) = 'f'; break; + case '\n': (*of++) = '\\'; (*of++) = 'n'; break; + case '\r': (*of++) = '\\'; (*of++) = 'r'; break; + case '\t': (*of++) = '\\'; (*of++) = 't'; break; + + case 0x26: // '/' + case 0x3c: // '<' + case 0x3e: // '>' + { + if (enc->encodeHTMLChars) + { + // Fall through to \u00XX case below. + } + else + { + // Same as default case below. + (*of++) = (*io); + break; + } + } + case 0x01: + case 0x02: + case 0x03: + case 0x04: + case 0x05: + case 0x06: + case 0x07: + case 0x0b: + case 0x0e: + case 0x0f: + case 0x10: + case 0x11: + case 0x12: + case 0x13: + case 0x14: + case 0x15: + case 0x16: + case 0x17: + case 0x18: + case 0x19: + case 0x1a: + case 0x1b: + case 0x1c: + case 0x1d: + case 0x1e: + case 0x1f: + { + *(of++) = '\\'; + *(of++) = 'u'; + *(of++) = '0'; + *(of++) = '0'; + *(of++) = g_hexChars[ (unsigned char) (((*io) & 0xf0) >> 4)]; + *(of++) = g_hexChars[ (unsigned char) ((*io) & 0x0f)]; + break; + } + default: (*of++) = (*io); break; + } + io++; + } } - -/* -FIXME: -This code only works with Little and Big Endian - -FIXME: The JSON spec says escape "/" but non of the others do and we don't -want to be left alone doing it so we don't :) - -*/ int Buffer_EscapeStringValidated (JSOBJ obj, JSONObjectEncoder *enc, const char *io, const char *end) { - JSUTF32 ucs; - char *of = (char *) enc->offset; + JSUTF32 ucs; + char *of = (char *) enc->offset; - while (1) + for (;;) + { + JSUINT8 utflen = g_asciiOutputTable[(unsigned char) *io]; + + switch (utflen) { + case 0: + { + if (io < end) + { + *(of++) = '\\'; + *(of++) = 'u'; + *(of++) = '0'; + *(of++) = '0'; + *(of++) = '0'; + *(of++) = '0'; + io ++; + continue; + } + else + { + enc->offset += (of - enc->offset); + return TRUE; + } + } - //JSUINT8 chr = (unsigned char) *io; - JSUINT8 utflen = g_asciiOutputTable[(unsigned char) *io]; + case 1: + { + *(of++)= (*io++); + continue; + } - switch (utflen) + case 2: + { + JSUTF32 in; + JSUTF16 in16; + + if (end - io < 1) { - case 0: - { - if (io < end) - { - *(of++) = '\\'; - *(of++) = 'u'; - *(of++) = '0'; - *(of++) = '0'; - *(of++) = '0'; - *(of++) = '0'; - io ++; - continue; - } - else - { - enc->offset += (of - enc->offset); - return TRUE; - } - } - - case 1: - { - *(of++)= (*io++); - continue; - } - - case 2: - { - JSUTF32 in; - JSUTF16 in16; - - if (end - io < 1) - { - enc->offset += (of - enc->offset); - SetError (obj, enc, "Unterminated UTF-8 sequence when encoding string"); - return FALSE; - } - - memcpy(&in16, io, sizeof(JSUTF16)); - in = (JSUTF32) in16; + enc->offset += (of - enc->offset); + SetError (obj, enc, "Unterminated UTF-8 sequence when encoding string"); + return FALSE; + } + + memcpy(&in16, io, sizeof(JSUTF16)); + in = (JSUTF32) in16; #ifdef __LITTLE_ENDIAN__ - ucs = ((in & 0x1f) << 6) | ((in >> 8) & 0x3f); + ucs = ((in & 0x1f) << 6) | ((in >> 8) & 0x3f); #else - ucs = ((in & 0x1f00) >> 2) | (in & 0x3f); + ucs = ((in & 0x1f00) >> 2) | (in & 0x3f); #endif - if (ucs < 0x80) - { - enc->offset += (of - enc->offset); - SetError (obj, enc, "Overlong 2 byte UTF-8 sequence detected when encoding string"); - return FALSE; - } - - io += 2; - break; - } - - case 3: - { - JSUTF32 in; - JSUTF16 in16; - JSUINT8 in8; - - if (end - io < 2) - { - enc->offset += (of - enc->offset); - SetError (obj, enc, "Unterminated UTF-8 sequence when encoding string"); - return FALSE; - } - - memcpy(&in16, io, sizeof(JSUTF16)); - memcpy(&in8, io + 2, sizeof(JSUINT8)); + if (ucs < 0x80) + { + enc->offset += (of - enc->offset); + SetError (obj, enc, "Overlong 2 byte UTF-8 sequence detected when encoding string"); + return FALSE; + } + + io += 2; + break; + } + + case 3: + { + JSUTF32 in; + JSUTF16 in16; + JSUINT8 in8; + + if (end - io < 2) + { + enc->offset += (of - enc->offset); + SetError (obj, enc, "Unterminated UTF-8 sequence when encoding string"); + return FALSE; + } + + memcpy(&in16, io, sizeof(JSUTF16)); + memcpy(&in8, io + 2, sizeof(JSUINT8)); #ifdef __LITTLE_ENDIAN__ - in = (JSUTF32) in16; - in |= in8 << 16; - ucs = ((in & 0x0f) << 12) | ((in & 0x3f00) >> 2) | ((in & 0x3f0000) >> 16); + in = (JSUTF32) in16; + in |= in8 << 16; + ucs = ((in & 0x0f) << 12) | ((in & 0x3f00) >> 2) | ((in & 0x3f0000) >> 16); #else - in = in16 << 8; - in |= in8; - ucs = ((in & 0x0f0000) >> 4) | ((in & 0x3f00) >> 2) | (in & 0x3f); + in = in16 << 8; + in |= in8; + ucs = ((in & 0x0f0000) >> 4) | ((in & 0x3f00) >> 2) | (in & 0x3f); #endif + if (ucs < 0x800) + { + enc->offset += (of - enc->offset); + SetError (obj, enc, "Overlong 3 byte UTF-8 sequence detected when encoding string"); + return FALSE; + } + + io += 3; + break; + } + case 4: + { + JSUTF32 in; - if (ucs < 0x800) - { - enc->offset += (of - enc->offset); - SetError (obj, enc, "Overlong 3 byte UTF-8 sequence detected when encoding string"); - return FALSE; - } - - io += 3; - break; - } - case 4: - { - JSUTF32 in; - - if (end - io < 3) - { - enc->offset += (of - enc->offset); - SetError (obj, enc, "Unterminated UTF-8 sequence when encoding string"); - return FALSE; - } - - memcpy(&in, io, sizeof(JSUTF32)); + if (end - io < 3) + { + enc->offset += (of - enc->offset); + SetError (obj, enc, "Unterminated UTF-8 sequence when encoding string"); + return FALSE; + } + + memcpy(&in, io, sizeof(JSUTF32)); #ifdef __LITTLE_ENDIAN__ - ucs = ((in & 0x07) << 18) | ((in & 0x3f00) << 4) | ((in & 0x3f0000) >> 10) | ((in & 0x3f000000) >> 24); + ucs = ((in & 0x07) << 18) | ((in & 0x3f00) << 4) | ((in & 0x3f0000) >> 10) | ((in & 0x3f000000) >> 24); #else - ucs = ((in & 0x07000000) >> 6) | ((in & 0x3f0000) >> 4) | ((in & 0x3f00) >> 2) | (in & 0x3f); + ucs = ((in & 0x07000000) >> 6) | ((in & 0x3f0000) >> 4) | ((in & 0x3f00) >> 2) | (in & 0x3f); #endif - if (ucs < 0x10000) - { - enc->offset += (of - enc->offset); - SetError (obj, enc, "Overlong 4 byte UTF-8 sequence detected when encoding string"); - return FALSE; - } - - io += 4; - break; - } - - - case 5: - case 6: - enc->offset += (of - enc->offset); - SetError (obj, enc, "Unsupported UTF-8 sequence length when encoding string"); - return FALSE; - - case 30: - // \uXXXX encode - *(of++) = '\\'; - *(of++) = 'u'; - *(of++) = '0'; - *(of++) = '0'; - *(of++) = g_hexChars[ (unsigned char) (((*io) & 0xf0) >> 4)]; - *(of++) = g_hexChars[ (unsigned char) ((*io) & 0x0f)]; - io ++; - continue; - - case 10: - case 12: - case 14: - case 16: - case 18: - case 20: - case 22: - case 24: - *(of++) = *( (char *) (g_escapeChars + utflen + 0)); - *(of++) = *( (char *) (g_escapeChars + utflen + 1)); - io ++; - continue; + if (ucs < 0x10000) + { + enc->offset += (of - enc->offset); + SetError (obj, enc, "Overlong 4 byte UTF-8 sequence detected when encoding string"); + return FALSE; } - /* - If the character is a UTF8 sequence of length > 1 we end up here */ - if (ucs >= 0x10000) + io += 4; + break; + } + + + case 5: + case 6: + { + enc->offset += (of - enc->offset); + SetError (obj, enc, "Unsupported UTF-8 sequence length when encoding string"); + return FALSE; + } + + case 29: + { + if (enc->encodeHTMLChars) { - ucs -= 0x10000; - *(of++) = '\\'; - *(of++) = 'u'; - Buffer_AppendShortHexUnchecked(of, (ucs >> 10) + 0xd800); - of += 4; - - *(of++) = '\\'; - *(of++) = 'u'; - Buffer_AppendShortHexUnchecked(of, (ucs & 0x3ff) + 0xdc00); - of += 4; + // Fall through to \u00XX case 30 below. } else { - *(of++) = '\\'; - *(of++) = 'u'; - Buffer_AppendShortHexUnchecked(of, ucs); - of += 4; + // Same as case 1 above. + *(of++) = (*io++); + continue; } + } + + case 30: + { + // \uXXXX encode + *(of++) = '\\'; + *(of++) = 'u'; + *(of++) = '0'; + *(of++) = '0'; + *(of++) = g_hexChars[ (unsigned char) (((*io) & 0xf0) >> 4)]; + *(of++) = g_hexChars[ (unsigned char) ((*io) & 0x0f)]; + io ++; + continue; + } + case 10: + case 12: + case 14: + case 16: + case 18: + case 20: + case 22: + case 24: + { + *(of++) = *( (char *) (g_escapeChars + utflen + 0)); + *(of++) = *( (char *) (g_escapeChars + utflen + 1)); + io ++; + continue; + } + // This can never happen, it's here to make L4 VC++ happy + default: + { + ucs = 0; + break; + } } - return FALSE; + /* + If the character is a UTF8 sequence of length > 1 we end up here */ + if (ucs >= 0x10000) + { + ucs -= 0x10000; + *(of++) = '\\'; + *(of++) = 'u'; + Buffer_AppendShortHexUnchecked(of, (unsigned short) (ucs >> 10) + 0xd800); + of += 4; + + *(of++) = '\\'; + *(of++) = 'u'; + Buffer_AppendShortHexUnchecked(of, (unsigned short) (ucs & 0x3ff) + 0xdc00); + of += 4; + } + else + { + *(of++) = '\\'; + *(of++) = 'u'; + Buffer_AppendShortHexUnchecked(of, (unsigned short) ucs); + of += 4; + } + } } #define Buffer_Reserve(__enc, __len) \ - if ((__enc)->end - (__enc)->offset < (__len)) \ + if ( (size_t) ((__enc)->end - (__enc)->offset) < (size_t) (__len)) \ { \ - Buffer_Realloc((__enc), (__len));\ + Buffer_Realloc((__enc), (__len));\ } \ @@ -427,176 +468,180 @@ int Buffer_EscapeStringValidated (JSOBJ obj, JSONObjectEncoder *enc, const char FASTCALL_ATTR INLINE_PREFIX void FASTCALL_MSVC strreverse(char* begin, char* end) { - char aux; - while (end > begin) - aux = *end, *end-- = *begin, *begin++ = aux; + char aux; + while (end > begin) + aux = *end, *end-- = *begin, *begin++ = aux; } void Buffer_AppendIntUnchecked(JSONObjectEncoder *enc, JSINT32 value) { - char* wstr; - JSUINT32 uvalue = (value < 0) ? -value : value; + char* wstr; + JSUINT32 uvalue = (value < 0) ? -value : value; - wstr = enc->offset; - // Conversion. Number is reversed. - - do *wstr++ = (char)(48 + (uvalue % 10)); while(uvalue /= 10); - if (value < 0) *wstr++ = '-'; + wstr = enc->offset; + // Conversion. Number is reversed. - // Reverse string - strreverse(enc->offset,wstr - 1); - enc->offset += (wstr - (enc->offset)); + do *wstr++ = (char)(48 + (uvalue % 10)); while(uvalue /= 10); + if (value < 0) *wstr++ = '-'; + + // Reverse string + strreverse(enc->offset,wstr - 1); + enc->offset += (wstr - (enc->offset)); } void Buffer_AppendLongUnchecked(JSONObjectEncoder *enc, JSINT64 value) { - char* wstr; - JSUINT64 uvalue = (value < 0) ? -value : value; + char* wstr; + JSUINT64 uvalue = (value < 0) ? -value : value; - wstr = enc->offset; - // Conversion. Number is reversed. - - do *wstr++ = (char)(48 + (uvalue % 10ULL)); while(uvalue /= 10ULL); - if (value < 0) *wstr++ = '-'; + wstr = enc->offset; + // Conversion. Number is reversed. - // Reverse string - strreverse(enc->offset,wstr - 1); - enc->offset += (wstr - (enc->offset)); + do *wstr++ = (char)(48 + (uvalue % 10ULL)); while(uvalue /= 10ULL); + if (value < 0) *wstr++ = '-'; + + // Reverse string + strreverse(enc->offset,wstr - 1); + enc->offset += (wstr - (enc->offset)); } int Buffer_AppendDoubleUnchecked(JSOBJ obj, JSONObjectEncoder *enc, double value) { - /* if input is larger than thres_max, revert to exponential */ - const double thres_max = (double) 1e16 - 1; - int count; - double diff = 0.0; - char* str = enc->offset; - char* wstr = str; - unsigned long long whole; - double tmp; - unsigned long long frac; - int neg; - double pow10; - - if (value == HUGE_VAL || value == -HUGE_VAL) - { - SetError (obj, enc, "Invalid Inf value when encoding double"); - return FALSE; - } - if (! (value == value)) - { - SetError (obj, enc, "Invalid Nan value when encoding double"); - return FALSE; - } - + /* if input is larger than thres_max, revert to exponential */ + const double thres_max = (double) 1e16 - 1; + int count; + double diff = 0.0; + char* str = enc->offset; + char* wstr = str; + unsigned long long whole; + double tmp; + unsigned long long frac; + int neg; + double pow10; + + if (value == HUGE_VAL || value == -HUGE_VAL) + { + SetError (obj, enc, "Invalid Inf value when encoding double"); + return FALSE; + } - /* we'll work in positive values and deal with the - negative sign issue later */ - neg = 0; - if (value < 0) + if (!(value == value)) + { + SetError (obj, enc, "Invalid Nan value when encoding double"); + return FALSE; + } + + /* we'll work in positive values and deal with the + negative sign issue later */ + neg = 0; + if (value < 0) + { + neg = 1; + value = -value; + } + + pow10 = g_pow10[enc->doublePrecision]; + + whole = (unsigned long long) value; + tmp = (value - whole) * pow10; + frac = (unsigned long long)(tmp); + diff = tmp - frac; + + if (diff > 0.5) + { + ++frac; + /* handle rollover, e.g. case 0.99 with prec 1 is 1.0 */ + if (frac >= pow10) { - neg = 1; - value = -value; + frac = 0; + ++whole; } + } + else + if (diff == 0.5 && ((frac == 0) || (frac & 1))) + { + /* if halfway, round up if odd, OR + if last digit is 0. That last part is strange */ + ++frac; + } + + /* for very large numbers switch back to native sprintf for exponentials. + anyone want to write code to replace this? */ + /* + normal printf behavior is to print EVERY whole number digit + which can be 100s of characters overflowing your buffers == bad + */ + if (value > thres_max) + { +#ifdef _WIN32 + enc->offset += sprintf_s(str, enc->end - enc->offset, "%.15e", neg ? -value : value); +#else + enc->offset += snprintf(str, enc->end - enc->offset, "%.15e", neg ? -value : value); +#endif + return TRUE; + } - pow10 = g_pow10[enc->doublePrecision]; - - whole = (unsigned long long) value; - tmp = (value - whole) * pow10; - frac = (unsigned long long)(tmp); - diff = tmp - frac; + if (enc->doublePrecision == 0) + { + diff = value - whole; - if (diff > 0.5) + if (diff > 0.5) { - ++frac; - /* handle rollover, e.g. case 0.99 with prec 1 is 1.0 */ - if (frac >= pow10) - { - frac = 0; - ++whole; - } - } - else - if (diff == 0.5 && ((frac == 0) || (frac & 1))) - { - /* if halfway, round up if odd, OR - if last digit is 0. That last part is strange */ - ++frac; + /* greater than 0.5, round up, e.g. 1.6 -> 2 */ + ++whole; } - - /* for very large numbers switch back to native sprintf for exponentials. - anyone want to write code to replace this? */ - /* - normal printf behavior is to print EVERY whole number digit - which can be 100s of characters overflowing your buffers == bad - */ - if (value > thres_max) + else + if (diff == 0.5 && (whole & 1)) { - enc->offset += sprintf(str, "%.15e", neg ? -value : value); - return TRUE; + /* exactly 0.5 and ODD, then round up */ + /* 1.5 -> 2, but 2.5 -> 2 */ + ++whole; } - if (enc->doublePrecision == 0) + //vvvvvvvvvvvvvvvvvvv Diff from modp_dto2 + } + else + if (frac) { - diff = value - whole; - - if (diff > 0.5) - { - /* greater than 0.5, round up, e.g. 1.6 -> 2 */ - ++whole; - } - else - if (diff == 0.5 && (whole & 1)) - { - /* exactly 0.5 and ODD, then round up */ - /* 1.5 -> 2, but 2.5 -> 2 */ - ++whole; - } - - //vvvvvvvvvvvvvvvvvvv Diff from modp_dto2 - } - else - if (frac) - { - count = enc->doublePrecision; - // now do fractional part, as an unsigned number - // we know it is not 0 but we can have leading zeros, these - // should be removed - while (!(frac % 10)) - { + count = enc->doublePrecision; + // now do fractional part, as an unsigned number + // we know it is not 0 but we can have leading zeros, these + // should be removed + while (!(frac % 10)) + { --count; frac /= 10; - } - //^^^^^^^^^^^^^^^^^^^ Diff from modp_dto2 + } + //^^^^^^^^^^^^^^^^^^^ Diff from modp_dto2 - // now do fractional part, as an unsigned number - do - { - --count; - *wstr++ = (char)(48 + (frac % 10)); - } while (frac /= 10); - // add extra 0s - while (count-- > 0) - { - *wstr++ = '0'; - } - // add decimal - *wstr++ = '.'; + // now do fractional part, as an unsigned number + do + { + --count; + *wstr++ = (char)(48 + (frac % 10)); + } while (frac /= 10); + // add extra 0s + while (count-- > 0) + { + *wstr++ = '0'; + } + // add decimal + *wstr++ = '.'; } else { - *wstr++ = '0'; - *wstr++ = '.'; + *wstr++ = '0'; + *wstr++ = '.'; } // do whole part // Take care of sign // Conversion. Number is reversed. do *wstr++ = (char)(48 + (whole % 10)); while (whole /= 10); - - if (neg) + + if (neg) { - *wstr++ = '-'; + *wstr++ = '-'; } strreverse(str, wstr-1); enc->offset += (wstr - (enc->offset)); @@ -604,11 +649,6 @@ int Buffer_AppendDoubleUnchecked(JSOBJ obj, JSONObjectEncoder *enc, double value return TRUE; } - - - - - /* FIXME: Handle integration functions returning NULL here */ @@ -619,62 +659,57 @@ Perhaps implement recursion detection */ void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name, size_t cbName) { - const char *value; - char *objName; - int count; - JSOBJ iterObj; - size_t szlen; - JSONTypeContext tc; - tc.encoder = enc; - - if (enc->level > enc->recursionMax) + const char *value; + char *objName; + int count; + JSOBJ iterObj; + size_t szlen; + JSONTypeContext tc; + tc.encoder = enc; + + if (enc->level > enc->recursionMax) + { + SetError (obj, enc, "Maximum recursion level reached"); + return; + } + + /* + This reservation must hold + + length of _name as encoded worst case + + maxLength of double to string OR maxLength of JSLONG to string + */ + + Buffer_Reserve(enc, 256 + RESERVE_STRING(cbName)); + if (enc->errorMsg) + { + return; + } + + if (name) + { + Buffer_AppendCharUnchecked(enc, '\"'); + + if (enc->forceASCII) { - SetError (obj, enc, "Maximum recursion level reached"); + if (!Buffer_EscapeStringValidated(obj, enc, name, name + cbName)) + { return; + } } - - /* - This reservation must hold - - length of _name as encoded worst case + - maxLength of double to string OR maxLength of JSLONG to string - - Since input is assumed to be UTF-8 the worst character length is: - - 4 bytes (of UTF-8) => "\uXXXX\uXXXX" (12 bytes) - */ - - Buffer_Reserve(enc, 256 + (((cbName / 4) + 1) * 12)); - if (enc->errorMsg) + else { + if (!Buffer_EscapeStringUnvalidated(enc, name, name + cbName)) + { return; + } } - if (name) - { - Buffer_AppendCharUnchecked(enc, '\"'); + Buffer_AppendCharUnchecked(enc, '\"'); - if (enc->forceASCII) - { - if (!Buffer_EscapeStringValidated(obj, enc, name, name + cbName)) - { - return; - } - } - else - { - if (!Buffer_EscapeStringUnvalidated(obj, enc, name, name + cbName)) - { - return; - } - } - - - Buffer_AppendCharUnchecked(enc, '\"'); - - Buffer_AppendCharUnchecked (enc, ':'); + Buffer_AppendCharUnchecked (enc, ':'); #ifndef JSON_NO_EXTRA_WHITESPACE - Buffer_AppendCharUnchecked (enc, ' '); + Buffer_AppendCharUnchecked (enc, ' '); #endif } @@ -682,210 +717,209 @@ void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name, size_t cbName) switch (tc.type) { - case JT_INVALID: - return; + case JT_INVALID: + { + return; + } - case JT_ARRAY: - { - count = 0; - enc->iterBegin(obj, &tc); + case JT_ARRAY: + { + count = 0; + enc->iterBegin(obj, &tc); - Buffer_AppendCharUnchecked (enc, '['); + Buffer_AppendCharUnchecked (enc, '['); - while (enc->iterNext(obj, &tc)) - { - if (count > 0) - { - Buffer_AppendCharUnchecked (enc, ','); + while (enc->iterNext(obj, &tc)) + { + if (count > 0) + { + Buffer_AppendCharUnchecked (enc, ','); #ifndef JSON_NO_EXTRA_WHITESPACE - Buffer_AppendCharUnchecked (buffer, ' '); + Buffer_AppendCharUnchecked (buffer, ' '); #endif - } + } - iterObj = enc->iterGetValue(obj, &tc); + iterObj = enc->iterGetValue(obj, &tc); - enc->level ++; - encode (iterObj, enc, NULL, 0); - count ++; - } + enc->level ++; + encode (iterObj, enc, NULL, 0); + count ++; + } - enc->iterEnd(obj, &tc); - Buffer_AppendCharUnchecked (enc, ']'); - break; - } + enc->iterEnd(obj, &tc); + Buffer_AppendCharUnchecked (enc, ']'); + break; + } - case JT_OBJECT: - { - count = 0; - enc->iterBegin(obj, &tc); + case JT_OBJECT: + { + count = 0; + enc->iterBegin(obj, &tc); - Buffer_AppendCharUnchecked (enc, '{'); + Buffer_AppendCharUnchecked (enc, '{'); - while (enc->iterNext(obj, &tc)) - { - if (count > 0) - { - Buffer_AppendCharUnchecked (enc, ','); + while (enc->iterNext(obj, &tc)) + { + if (count > 0) + { + Buffer_AppendCharUnchecked (enc, ','); #ifndef JSON_NO_EXTRA_WHITESPACE - Buffer_AppendCharUnchecked (enc, ' '); + Buffer_AppendCharUnchecked (enc, ' '); #endif - } + } - iterObj = enc->iterGetValue(obj, &tc); - objName = enc->iterGetName(obj, &tc, &szlen); - - enc->level ++; - encode (iterObj, enc, objName, szlen); - count ++; - } - - enc->iterEnd(obj, &tc); - Buffer_AppendCharUnchecked (enc, '}'); - break; - } - - case JT_LONG: - { - Buffer_AppendLongUnchecked (enc, enc->getLongValue(obj, &tc)); - break; - } + iterObj = enc->iterGetValue(obj, &tc); + objName = enc->iterGetName(obj, &tc, &szlen); - case JT_INT: - { - Buffer_AppendIntUnchecked (enc, enc->getIntValue(obj, &tc)); - break; - } - - case JT_TRUE: - { - Buffer_AppendCharUnchecked (enc, 't'); - Buffer_AppendCharUnchecked (enc, 'r'); - Buffer_AppendCharUnchecked (enc, 'u'); - Buffer_AppendCharUnchecked (enc, 'e'); - break; - } - - case JT_FALSE: - { - Buffer_AppendCharUnchecked (enc, 'f'); - Buffer_AppendCharUnchecked (enc, 'a'); - Buffer_AppendCharUnchecked (enc, 'l'); - Buffer_AppendCharUnchecked (enc, 's'); - Buffer_AppendCharUnchecked (enc, 'e'); - break; - } + enc->level ++; + encode (iterObj, enc, objName, szlen); + count ++; + } + enc->iterEnd(obj, &tc); + Buffer_AppendCharUnchecked (enc, '}'); + break; + } + + case JT_LONG: + { + Buffer_AppendLongUnchecked (enc, enc->getLongValue(obj, &tc)); + break; + } + + case JT_INT: + { + Buffer_AppendIntUnchecked (enc, enc->getIntValue(obj, &tc)); + break; + } + + case JT_TRUE: + { + Buffer_AppendCharUnchecked (enc, 't'); + Buffer_AppendCharUnchecked (enc, 'r'); + Buffer_AppendCharUnchecked (enc, 'u'); + Buffer_AppendCharUnchecked (enc, 'e'); + break; + } + + case JT_FALSE: + { + Buffer_AppendCharUnchecked (enc, 'f'); + Buffer_AppendCharUnchecked (enc, 'a'); + Buffer_AppendCharUnchecked (enc, 'l'); + Buffer_AppendCharUnchecked (enc, 's'); + Buffer_AppendCharUnchecked (enc, 'e'); + break; + } + + + case JT_NULL: + { + Buffer_AppendCharUnchecked (enc, 'n'); + Buffer_AppendCharUnchecked (enc, 'u'); + Buffer_AppendCharUnchecked (enc, 'l'); + Buffer_AppendCharUnchecked (enc, 'l'); + break; + } + + case JT_DOUBLE: + { + if (!Buffer_AppendDoubleUnchecked (obj, enc, enc->getDoubleValue(obj, &tc))) + { + enc->endTypeContext(obj, &tc); + enc->level --; + return; + } + break; + } + + case JT_UTF8: + { + value = enc->getStringValue(obj, &tc, &szlen); + Buffer_Reserve(enc, RESERVE_STRING(szlen)); + if (enc->errorMsg) + { + enc->endTypeContext(obj, &tc); + return; + } + Buffer_AppendCharUnchecked (enc, '\"'); - case JT_NULL: + if (enc->forceASCII) + { + if (!Buffer_EscapeStringValidated(obj, enc, value, value + szlen)) { - Buffer_AppendCharUnchecked (enc, 'n'); - Buffer_AppendCharUnchecked (enc, 'u'); - Buffer_AppendCharUnchecked (enc, 'l'); - Buffer_AppendCharUnchecked (enc, 'l'); - break; + enc->endTypeContext(obj, &tc); + enc->level --; + return; } - - case JT_DOUBLE: + } + else + { + if (!Buffer_EscapeStringUnvalidated(enc, value, value + szlen)) { - if (!Buffer_AppendDoubleUnchecked (obj, enc, enc->getDoubleValue(obj, &tc))) - { - enc->endTypeContext(obj, &tc); - enc->level --; - return; - } - break; + enc->endTypeContext(obj, &tc); + enc->level --; + return; } + } - case JT_UTF8: - { - value = enc->getStringValue(obj, &tc, &szlen); - Buffer_Reserve(enc, ((szlen / 4) + 1) * 12); - if (enc->errorMsg) - { - enc->endTypeContext(obj, &tc); - return; - } - Buffer_AppendCharUnchecked (enc, '\"'); - - - if (enc->forceASCII) - { - if (!Buffer_EscapeStringValidated(obj, enc, value, value + szlen)) - { - enc->endTypeContext(obj, &tc); - enc->level --; - return; - } - } - else - { - if (!Buffer_EscapeStringUnvalidated(obj, enc, value, value + szlen)) - { - enc->endTypeContext(obj, &tc); - enc->level --; - return; - } - } - - Buffer_AppendCharUnchecked (enc, '\"'); - break; - } + Buffer_AppendCharUnchecked (enc, '\"'); + break; } + } - enc->endTypeContext(obj, &tc); - enc->level --; - + enc->endTypeContext(obj, &tc); + enc->level --; } char *JSON_EncodeObject(JSOBJ obj, JSONObjectEncoder *enc, char *_buffer, size_t _cbBuffer) { - enc->malloc = enc->malloc ? enc->malloc : malloc; - enc->free = enc->free ? enc->free : free; - enc->realloc = enc->realloc ? enc->realloc : realloc; - enc->errorMsg = NULL; - enc->errorObj = NULL; - enc->level = 0; - - if (enc->recursionMax < 1) - { - enc->recursionMax = JSON_MAX_RECURSION_DEPTH; - } - - if (enc->doublePrecision < 0 || - enc->doublePrecision > JSON_DOUBLE_MAX_DECIMALS) - { - enc->doublePrecision = JSON_DOUBLE_MAX_DECIMALS; - } - - if (_buffer == NULL) + enc->malloc = enc->malloc ? enc->malloc : malloc; + enc->free = enc->free ? enc->free : free; + enc->realloc = enc->realloc ? enc->realloc : realloc; + enc->errorMsg = NULL; + enc->errorObj = NULL; + enc->level = 0; + + if (enc->recursionMax < 1) + { + enc->recursionMax = JSON_MAX_RECURSION_DEPTH; + } + + if (enc->doublePrecision < 0 || + enc->doublePrecision > JSON_DOUBLE_MAX_DECIMALS) + { + enc->doublePrecision = JSON_DOUBLE_MAX_DECIMALS; + } + + if (_buffer == NULL) + { + _cbBuffer = 32768; + enc->start = (char *) enc->malloc (_cbBuffer); + if (!enc->start) { - _cbBuffer = 32768; - enc->start = (char *) enc->malloc (_cbBuffer); - if (!enc->start) - { - SetError(obj, enc, "Could not reserve memory block"); - return NULL; - } - enc->heap = 1; - } - else - { - enc->start = _buffer; - enc->heap = 0; + SetError(obj, enc, "Could not reserve memory block"); + return NULL; } - - enc->end = enc->start + _cbBuffer; - enc->offset = enc->start; - - - encode (obj, enc, NULL, 0); - - Buffer_Reserve(enc, 1); - if (enc->errorMsg) - { - return NULL; - } - Buffer_AppendCharUnchecked(enc, '\0'); - - return enc->start; + enc->heap = 1; + } + else + { + enc->start = _buffer; + enc->heap = 0; + } + + enc->end = enc->start + _cbBuffer; + enc->offset = enc->start; + + encode (obj, enc, NULL, 0); + + Buffer_Reserve(enc, 1); + if (enc->errorMsg) + { + return NULL; + } + Buffer_AppendCharUnchecked(enc, '\0'); + + return enc->start; } diff --git a/pandas/src/ujson/python/JSONtoObj.c b/pandas/src/ujson/python/JSONtoObj.c index bc42269d9698b..9c1b4febd9895 100644 --- a/pandas/src/ujson/python/JSONtoObj.c +++ b/pandas/src/ujson/python/JSONtoObj.c @@ -1,3 +1,40 @@ +/* +Copyright (c) 2011-2013, ESN Social Software AB and Jonas Tarnstrom +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the ESN Social Software AB nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL ESN SOCIAL SOFTWARE AB OR JONAS TARNSTROM BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +Portions of code from MODP_ASCII - Ascii transformations (upper/lower, etc) +http://code.google.com/p/stringencoders/ +Copyright (c) 2007 Nick Galbreath -- nickg [at] modp [dot] com. All rights reserved. + +Numeric decoder derived from from TCL library +http://www.opensource.apple.com/source/tcl/tcl-14/tcl/license.terms + * Copyright (c) 1988-1993 The Regents of the University of California. + * Copyright (c) 1994 Sun Microsystems, Inc. +*/ + #include "py_defines.h" #define PY_ARRAY_UNIQUE_SYMBOL UJSON_NUMPY #define NO_IMPORT_ARRAY @@ -5,33 +42,33 @@ #include +//#define PRINTMARK() fprintf(stderr, "%s: MARK(%d)\n", __FILE__, __LINE__) +#define PRINTMARK() + typedef struct __PyObjectDecoder { - JSONObjectDecoder dec; + JSONObjectDecoder dec; - void* npyarr; // Numpy context buffer - void* npyarr_addr; // Ref to npyarr ptr to track DECREF calls - npy_intp curdim; // Current array dimension + void* npyarr; // Numpy context buffer + void* npyarr_addr; // Ref to npyarr ptr to track DECREF calls + npy_intp curdim; // Current array dimension - PyArray_Descr* dtype; + PyArray_Descr* dtype; } PyObjectDecoder; typedef struct __NpyArrContext { - PyObject* ret; - PyObject* labels[2]; - PyArray_Dims shape; + PyObject* ret; + PyObject* labels[2]; + PyArray_Dims shape; - PyObjectDecoder* dec; + PyObjectDecoder* dec; - npy_intp i; - npy_intp elsize; - npy_intp elcount; + npy_intp i; + npy_intp elsize; + npy_intp elcount; } NpyArrContext; -//#define PRINTMARK() fprintf(stderr, "%s: MARK(%d)\n", __FILE__, __LINE__) -#define PRINTMARK() - // Numpy handling based on numpy internal code, specifically the function // PyArray_FromIter. @@ -39,638 +76,661 @@ typedef struct __NpyArrContext // to ensure the compiler catches any errors // standard numpy array handling -JSOBJ Object_npyNewArray(void* decoder); -JSOBJ Object_npyEndArray(JSOBJ obj); -int Object_npyArrayAddItem(JSOBJ obj, JSOBJ value); +JSOBJ Object_npyNewArray(void *prv, void* decoder); +JSOBJ Object_npyEndArray(void *prv, JSOBJ obj); +int Object_npyArrayAddItem(void *prv, JSOBJ obj, JSOBJ value); // for more complex dtypes (object and string) fill a standard Python list // and convert to a numpy array when done. -JSOBJ Object_npyNewArrayList(void* decoder); -JSOBJ Object_npyEndArrayList(JSOBJ obj); -int Object_npyArrayListAddItem(JSOBJ obj, JSOBJ value); +JSOBJ Object_npyNewArrayList(void *prv, void* decoder); +JSOBJ Object_npyEndArrayList(void *prv, JSOBJ obj); +int Object_npyArrayListAddItem(void *prv, JSOBJ obj, JSOBJ value); // labelled support, encode keys and values of JS object into separate numpy // arrays -JSOBJ Object_npyNewObject(void* decoder); -JSOBJ Object_npyEndObject(JSOBJ obj); -int Object_npyObjectAddKey(JSOBJ obj, JSOBJ name, JSOBJ value); - +JSOBJ Object_npyNewObject(void *prv, void* decoder); +JSOBJ Object_npyEndObject(void *prv, JSOBJ obj); +int Object_npyObjectAddKey(void *prv, JSOBJ obj, JSOBJ name, JSOBJ value); // free the numpy context buffer void Npy_releaseContext(NpyArrContext* npyarr) { - PRINTMARK(); - if (npyarr) + PRINTMARK(); + if (npyarr) + { + if (npyarr->shape.ptr) { - if (npyarr->shape.ptr) - { - PyObject_Free(npyarr->shape.ptr); - } - if (npyarr->dec) - { - npyarr->dec->npyarr = NULL; - npyarr->dec->curdim = 0; - } - Py_XDECREF(npyarr->labels[0]); - Py_XDECREF(npyarr->labels[1]); - Py_XDECREF(npyarr->ret); - PyObject_Free(npyarr); + PyObject_Free(npyarr->shape.ptr); } + if (npyarr->dec) + { + npyarr->dec->npyarr = NULL; + npyarr->dec->curdim = 0; + } + Py_XDECREF(npyarr->labels[0]); + Py_XDECREF(npyarr->labels[1]); + Py_XDECREF(npyarr->ret); + PyObject_Free(npyarr); + } } -JSOBJ Object_npyNewArray(void* _decoder) +JSOBJ Object_npyNewArray(void *prv, void* _decoder) { - NpyArrContext* npyarr; - PyObjectDecoder* decoder = (PyObjectDecoder*) _decoder; - PRINTMARK(); - if (decoder->curdim <= 0) + NpyArrContext* npyarr; + PyObjectDecoder* decoder = (PyObjectDecoder*) _decoder; + PRINTMARK(); + if (decoder->curdim <= 0) + { + // start of array - initialise the context buffer + npyarr = decoder->npyarr = PyObject_Malloc(sizeof(NpyArrContext)); + decoder->npyarr_addr = npyarr; + + if (!npyarr) { - // start of array - initialise the context buffer - npyarr = decoder->npyarr = PyObject_Malloc(sizeof(NpyArrContext)); - decoder->npyarr_addr = npyarr; - - if (!npyarr) - { - PyErr_NoMemory(); - return NULL; - } - - npyarr->dec = decoder; - npyarr->labels[0] = npyarr->labels[1] = NULL; - - npyarr->shape.ptr = PyObject_Malloc(sizeof(npy_intp)*NPY_MAXDIMS); - npyarr->shape.len = 1; - npyarr->ret = NULL; - - npyarr->elsize = 0; - npyarr->elcount = 4; - npyarr->i = 0; - } - else + PyErr_NoMemory(); + return NULL; + } + + npyarr->dec = decoder; + npyarr->labels[0] = npyarr->labels[1] = NULL; + + npyarr->shape.ptr = PyObject_Malloc(sizeof(npy_intp)*NPY_MAXDIMS); + npyarr->shape.len = 1; + npyarr->ret = NULL; + + npyarr->elsize = 0; + npyarr->elcount = 4; + npyarr->i = 0; + } + else + { + // starting a new dimension continue the current array (and reshape after) + npyarr = (NpyArrContext*) decoder->npyarr; + if (decoder->curdim >= npyarr->shape.len) { - // starting a new dimension continue the current array (and reshape after) - npyarr = (NpyArrContext*) decoder->npyarr; - if (decoder->curdim >= npyarr->shape.len) - { - npyarr->shape.len++; - } + npyarr->shape.len++; } + } - npyarr->shape.ptr[decoder->curdim] = 0; - decoder->curdim++; - return npyarr; + npyarr->shape.ptr[decoder->curdim] = 0; + decoder->curdim++; + return npyarr; } PyObject* Npy_returnLabelled(NpyArrContext* npyarr) { - PyObject* ret = npyarr->ret; - npy_intp i; - - if (npyarr->labels[0] || npyarr->labels[1]) + PyObject* ret = npyarr->ret; + npy_intp i; + + if (npyarr->labels[0] || npyarr->labels[1]) + { + // finished decoding, build tuple with values and labels + ret = PyTuple_New(npyarr->shape.len+1); + for (i = 0; i < npyarr->shape.len; i++) { - // finished decoding, build tuple with values and labels - ret = PyTuple_New(npyarr->shape.len+1); - for (i = 0; i < npyarr->shape.len; i++) - { - if (npyarr->labels[i]) - { - PyTuple_SET_ITEM(ret, i+1, npyarr->labels[i]); - npyarr->labels[i] = NULL; - } - else - { - Py_INCREF(Py_None); - PyTuple_SET_ITEM(ret, i+1, Py_None); - } - } - PyTuple_SET_ITEM(ret, 0, npyarr->ret); - } - - return ret; + if (npyarr->labels[i]) + { + PyTuple_SET_ITEM(ret, i+1, npyarr->labels[i]); + npyarr->labels[i] = NULL; + } + else + { + Py_INCREF(Py_None); + PyTuple_SET_ITEM(ret, i+1, Py_None); + } + } + PyTuple_SET_ITEM(ret, 0, npyarr->ret); + } + + return ret; } -JSOBJ Object_npyEndArray(JSOBJ obj) +JSOBJ Object_npyEndArray(void *prv, JSOBJ obj) { - PyObject *ret; - char* new_data; - NpyArrContext* npyarr = (NpyArrContext*) obj; - int emptyType = NPY_DEFAULT_TYPE; - npy_intp i; - PRINTMARK(); - if (!npyarr) - { - return NULL; - } + PyObject *ret; + char* new_data; + NpyArrContext* npyarr = (NpyArrContext*) obj; + int emptyType = NPY_DEFAULT_TYPE; + npy_intp i; + PRINTMARK(); + if (!npyarr) + { + return NULL; + } - ret = npyarr->ret; - i = npyarr->i; + ret = npyarr->ret; + i = npyarr->i; - npyarr->dec->curdim--; + npyarr->dec->curdim--; - if (i == 0 || !npyarr->ret) { - // empty array would not have been initialised so do it now. - if (npyarr->dec->dtype) - { - emptyType = npyarr->dec->dtype->type_num; - } - npyarr->ret = ret = PyArray_EMPTY(npyarr->shape.len, npyarr->shape.ptr, emptyType, 0); - } - else if (npyarr->dec->curdim <= 0) + if (i == 0 || !npyarr->ret) { + // empty array would not have been initialised so do it now. + if (npyarr->dec->dtype) { - // realloc to final size - new_data = PyDataMem_RENEW(PyArray_DATA(ret), i * npyarr->elsize); - if (new_data == NULL) { - PyErr_NoMemory(); - Npy_releaseContext(npyarr); - return NULL; - } - ((PyArrayObject*) ret)->data = (void*) new_data; - // PyArray_BYTES(ret) = new_data; - } - - if (npyarr->dec->curdim <= 0) + emptyType = npyarr->dec->dtype->type_num; + } + npyarr->ret = ret = PyArray_EMPTY(npyarr->shape.len, npyarr->shape.ptr, emptyType, 0); + } + else if (npyarr->dec->curdim <= 0) + { + // realloc to final size + new_data = PyDataMem_RENEW(PyArray_DATA(ret), i * npyarr->elsize); + if (new_data == NULL) { + PyErr_NoMemory(); + Npy_releaseContext(npyarr); + return NULL; + } + ((PyArrayObject*) ret)->data = (void*) new_data; + // PyArray_BYTES(ret) = new_data; + } + + if (npyarr->dec->curdim <= 0) + { + // finished decoding array, reshape if necessary + if (npyarr->shape.len > 1) { - // finished decoding array, reshape if necessary - if (npyarr->shape.len > 1) - { - npyarr->ret = PyArray_Newshape((PyArrayObject*) ret, &npyarr->shape, NPY_ANYORDER); - Py_DECREF(ret); - } + npyarr->ret = PyArray_Newshape((PyArrayObject*) ret, &npyarr->shape, NPY_ANYORDER); + Py_DECREF(ret); + } - ret = Npy_returnLabelled(npyarr); + ret = Npy_returnLabelled(npyarr); - npyarr->ret = NULL; - Npy_releaseContext(npyarr); - } + npyarr->ret = NULL; + Npy_releaseContext(npyarr); + } - return ret; + return ret; } -int Object_npyArrayAddItem(JSOBJ obj, JSOBJ value) +int Object_npyArrayAddItem(void *prv, JSOBJ obj, JSOBJ value) { - PyObject* type; - PyArray_Descr* dtype; - npy_intp i; - char *new_data, *item; - NpyArrContext* npyarr = (NpyArrContext*) obj; - PRINTMARK(); - if (!npyarr) + PyObject* type; + PyArray_Descr* dtype; + npy_intp i; + char *new_data, *item; + NpyArrContext* npyarr = (NpyArrContext*) obj; + PRINTMARK(); + if (!npyarr) + { + return 0; + } + + i = npyarr->i; + + npyarr->shape.ptr[npyarr->dec->curdim-1]++; + + if (PyArray_Check((PyObject*)value)) + { + // multidimensional array, keep decoding values. + return 1; + } + + if (!npyarr->ret) + { + // Array not initialised yet. + // We do it here so we can 'sniff' the data type if none was provided + if (!npyarr->dec->dtype) + { + type = PyObject_Type(value); + if(!PyArray_DescrConverter(type, &dtype)) + { + Py_DECREF(type); + goto fail; + } + Py_INCREF(dtype); + Py_DECREF(type); + } + else { - return 0; + dtype = PyArray_DescrNew(npyarr->dec->dtype); } - i = npyarr->i; + // If it's an object or string then fill a Python list and subsequently + // convert. Otherwise we would need to somehow mess about with + // reference counts when renewing memory. + npyarr->elsize = dtype->elsize; + if (PyDataType_REFCHK(dtype) || npyarr->elsize == 0) + { + Py_XDECREF(dtype); - npyarr->shape.ptr[npyarr->dec->curdim-1]++; + if (npyarr->dec->curdim > 1) + { + PyErr_SetString(PyExc_ValueError, "Cannot decode multidimensional arrays with variable length elements to numpy"); + goto fail; + } + npyarr->elcount = 0; + npyarr->ret = PyList_New(0); + if (!npyarr->ret) + { + goto fail; + } + ((JSONObjectDecoder*)npyarr->dec)->newArray = Object_npyNewArrayList; + ((JSONObjectDecoder*)npyarr->dec)->arrayAddItem = Object_npyArrayListAddItem; + ((JSONObjectDecoder*)npyarr->dec)->endArray = Object_npyEndArrayList; + return Object_npyArrayListAddItem(prv, obj, value); + } + + npyarr->ret = PyArray_NewFromDescr(&PyArray_Type, dtype, 1, + &npyarr->elcount, NULL,NULL, 0, NULL); - if (PyArray_Check((PyObject*)value)) + if (!npyarr->ret) { - // multidimensional array, keep decoding values. - return 1; + goto fail; } + } - if (!npyarr->ret) + if (i >= npyarr->elcount) { + // Grow PyArray_DATA(ret): + // this is similar for the strategy for PyListObject, but we use + // 50% overallocation => 0, 4, 8, 14, 23, 36, 56, 86 ... + if (npyarr->elsize == 0) { - // Array not initialised yet. - // We do it here so we can 'sniff' the data type if none was provided - if (!npyarr->dec->dtype) - { - type = PyObject_Type(value); - if(!PyArray_DescrConverter(type, &dtype)) - { - Py_DECREF(type); - goto fail; - } - Py_INCREF(dtype); - Py_DECREF(type); - } - else - { - dtype = PyArray_DescrNew(npyarr->dec->dtype); - } - - // If it's an object or string then fill a Python list and subsequently - // convert. Otherwise we would need to somehow mess about with - // reference counts when renewing memory. - npyarr->elsize = dtype->elsize; - if (PyDataType_REFCHK(dtype) || npyarr->elsize == 0) - { - Py_XDECREF(dtype); - - if (npyarr->dec->curdim > 1) - { - PyErr_SetString(PyExc_ValueError, "Cannot decode multidimensional arrays with variable length elements to numpy"); - goto fail; - } - npyarr->elcount = 0; - npyarr->ret = PyList_New(0); - if (!npyarr->ret) - { - goto fail; - } - ((JSONObjectDecoder*)npyarr->dec)->newArray = Object_npyNewArrayList; - ((JSONObjectDecoder*)npyarr->dec)->arrayAddItem = Object_npyArrayListAddItem; - ((JSONObjectDecoder*)npyarr->dec)->endArray = Object_npyEndArrayList; - return Object_npyArrayListAddItem(obj, value); - } - - npyarr->ret = PyArray_NewFromDescr(&PyArray_Type, dtype, 1, - &npyarr->elcount, NULL,NULL, 0, NULL); - - if (!npyarr->ret) - { - goto fail; - } + PyErr_SetString(PyExc_ValueError, "Cannot decode multidimensional arrays with variable length elements to numpy"); + goto fail; } - if (i >= npyarr->elcount) { - // Grow PyArray_DATA(ret): - // this is similar for the strategy for PyListObject, but we use - // 50% overallocation => 0, 4, 8, 14, 23, 36, 56, 86 ... - if (npyarr->elsize == 0) - { - PyErr_SetString(PyExc_ValueError, "Cannot decode multidimensional arrays with variable length elements to numpy"); - goto fail; - } - - npyarr->elcount = (i >> 1) + (i < 4 ? 4 : 2) + i; - if (npyarr->elcount <= NPY_MAX_INTP/npyarr->elsize) { - new_data = PyDataMem_RENEW(PyArray_DATA(npyarr->ret), npyarr->elcount * npyarr->elsize); - } - else { - PyErr_NoMemory(); - goto fail; - } - ((PyArrayObject*) npyarr->ret)->data = (void*) new_data; - - // PyArray_BYTES(npyarr->ret) = new_data; + npyarr->elcount = (i >> 1) + (i < 4 ? 4 : 2) + i; + if (npyarr->elcount <= NPY_MAX_INTP/npyarr->elsize) { + new_data = PyDataMem_RENEW(PyArray_DATA(npyarr->ret), npyarr->elcount * npyarr->elsize); + } + else { + PyErr_NoMemory(); + goto fail; } + ((PyArrayObject*) npyarr->ret)->data = (void*) new_data; - PyArray_DIMS(npyarr->ret)[0] = i + 1; + // PyArray_BYTES(npyarr->ret) = new_data; + } - if ((item = PyArray_GETPTR1(npyarr->ret, i)) == NULL - || PyArray_SETITEM(npyarr->ret, item, value) == -1) { - goto fail; - } + PyArray_DIMS(npyarr->ret)[0] = i + 1; - Py_DECREF( (PyObject *) value); - npyarr->i++; - return 1; + if ((item = PyArray_GETPTR1(npyarr->ret, i)) == NULL + || PyArray_SETITEM(npyarr->ret, item, value) == -1) { + goto fail; + } + + Py_DECREF( (PyObject *) value); + npyarr->i++; + return 1; fail: - Npy_releaseContext(npyarr); - return 0; + Npy_releaseContext(npyarr); + return 0; } -JSOBJ Object_npyNewArrayList(void* _decoder) +JSOBJ Object_npyNewArrayList(void *prv, void* _decoder) { - PyObjectDecoder* decoder = (PyObjectDecoder*) _decoder; - PRINTMARK(); - PyErr_SetString(PyExc_ValueError, "nesting not supported for object or variable length dtypes"); - Npy_releaseContext(decoder->npyarr); - return NULL; + PyObjectDecoder* decoder = (PyObjectDecoder*) _decoder; + PRINTMARK(); + PyErr_SetString(PyExc_ValueError, "nesting not supported for object or variable length dtypes"); + Npy_releaseContext(decoder->npyarr); + return NULL; } -JSOBJ Object_npyEndArrayList(JSOBJ obj) +JSOBJ Object_npyEndArrayList(void *prv, JSOBJ obj) { - PyObject *list, *ret; - NpyArrContext* npyarr = (NpyArrContext*) obj; - PRINTMARK(); - if (!npyarr) - { - return NULL; - } + PyObject *list, *ret; + NpyArrContext* npyarr = (NpyArrContext*) obj; + PRINTMARK(); + if (!npyarr) + { + return NULL; + } - // convert decoded list to numpy array - list = (PyObject *) npyarr->ret; - npyarr->ret = PyArray_FROM_O(list); + // convert decoded list to numpy array + list = (PyObject *) npyarr->ret; + npyarr->ret = PyArray_FROM_O(list); - ret = Npy_returnLabelled(npyarr); - npyarr->ret = list; + ret = Npy_returnLabelled(npyarr); + npyarr->ret = list; - ((JSONObjectDecoder*)npyarr->dec)->newArray = Object_npyNewArray; - ((JSONObjectDecoder*)npyarr->dec)->arrayAddItem = Object_npyArrayAddItem; - ((JSONObjectDecoder*)npyarr->dec)->endArray = Object_npyEndArray; - Npy_releaseContext(npyarr); - return ret; + ((JSONObjectDecoder*)npyarr->dec)->newArray = Object_npyNewArray; + ((JSONObjectDecoder*)npyarr->dec)->arrayAddItem = Object_npyArrayAddItem; + ((JSONObjectDecoder*)npyarr->dec)->endArray = Object_npyEndArray; + Npy_releaseContext(npyarr); + return ret; } -int Object_npyArrayListAddItem(JSOBJ obj, JSOBJ value) +int Object_npyArrayListAddItem(void *prv, JSOBJ obj, JSOBJ value) { - NpyArrContext* npyarr = (NpyArrContext*) obj; - PRINTMARK(); - if (!npyarr) - { - return 0; - } - PyList_Append((PyObject*) npyarr->ret, value); - Py_DECREF( (PyObject *) value); - npyarr->elcount++; - return 1; + NpyArrContext* npyarr = (NpyArrContext*) obj; + PRINTMARK(); + if (!npyarr) + { + return 0; + } + PyList_Append((PyObject*) npyarr->ret, value); + Py_DECREF( (PyObject *) value); + npyarr->elcount++; + return 1; } -JSOBJ Object_npyNewObject(void* _decoder) +JSOBJ Object_npyNewObject(void *prv, void* _decoder) { - PyObjectDecoder* decoder = (PyObjectDecoder*) _decoder; - PRINTMARK(); - if (decoder->curdim > 1) - { - PyErr_SetString(PyExc_ValueError, "labels only supported up to 2 dimensions"); - return NULL; - } + PyObjectDecoder* decoder = (PyObjectDecoder*) _decoder; + PRINTMARK(); + if (decoder->curdim > 1) + { + PyErr_SetString(PyExc_ValueError, "labels only supported up to 2 dimensions"); + return NULL; + } - return ((JSONObjectDecoder*)decoder)->newArray(decoder); + return ((JSONObjectDecoder*)decoder)->newArray(prv, decoder); } -JSOBJ Object_npyEndObject(JSOBJ obj) +JSOBJ Object_npyEndObject(void *prv, JSOBJ obj) { - PyObject *list; - npy_intp labelidx; - NpyArrContext* npyarr = (NpyArrContext*) obj; - PRINTMARK(); - if (!npyarr) - { - return NULL; - } + PyObject *list; + npy_intp labelidx; + NpyArrContext* npyarr = (NpyArrContext*) obj; + PRINTMARK(); + if (!npyarr) + { + return NULL; + } - labelidx = npyarr->dec->curdim-1; + labelidx = npyarr->dec->curdim-1; - list = npyarr->labels[labelidx]; - if (list) - { - npyarr->labels[labelidx] = PyArray_FROM_O(list); - Py_DECREF(list); - } + list = npyarr->labels[labelidx]; + if (list) + { + npyarr->labels[labelidx] = PyArray_FROM_O(list); + Py_DECREF(list); + } - return (PyObject*) ((JSONObjectDecoder*)npyarr->dec)->endArray(obj); + return (PyObject*) ((JSONObjectDecoder*)npyarr->dec)->endArray(prv, obj); } -int Object_npyObjectAddKey(JSOBJ obj, JSOBJ name, JSOBJ value) +int Object_npyObjectAddKey(void *prv, JSOBJ obj, JSOBJ name, JSOBJ value) { - PyObject *label; - npy_intp labelidx; - // add key to label array, value to values array - NpyArrContext* npyarr = (NpyArrContext*) obj; - PRINTMARK(); - if (!npyarr) - { - return 0; - } - - label = (PyObject*) name; - labelidx = npyarr->dec->curdim-1; - - if (!npyarr->labels[labelidx]) - { - npyarr->labels[labelidx] = PyList_New(0); - } - - // only fill label array once, assumes all column labels are the same - // for 2-dimensional arrays. - if (PyList_GET_SIZE(npyarr->labels[labelidx]) <= npyarr->elcount) - { - PyList_Append(npyarr->labels[labelidx], label); - } - - if(((JSONObjectDecoder*)npyarr->dec)->arrayAddItem(obj, value)) - { - Py_DECREF(label); - return 1; - } + PyObject *label; + npy_intp labelidx; + // add key to label array, value to values array + NpyArrContext* npyarr = (NpyArrContext*) obj; + PRINTMARK(); + if (!npyarr) + { return 0; + } + + label = (PyObject*) name; + labelidx = npyarr->dec->curdim-1; + + if (!npyarr->labels[labelidx]) + { + npyarr->labels[labelidx] = PyList_New(0); + } + + // only fill label array once, assumes all column labels are the same + // for 2-dimensional arrays. + if (PyList_GET_SIZE(npyarr->labels[labelidx]) <= npyarr->elcount) + { + PyList_Append(npyarr->labels[labelidx], label); + } + + if(((JSONObjectDecoder*)npyarr->dec)->arrayAddItem(prv, obj, value)) + { + Py_DECREF(label); + return 1; + } + return 0; } -int Object_objectAddKey(JSOBJ obj, JSOBJ name, JSOBJ value) +int Object_objectAddKey(void *prv, JSOBJ obj, JSOBJ name, JSOBJ value) { - PyDict_SetItem (obj, name, value); - Py_DECREF( (PyObject *) name); - Py_DECREF( (PyObject *) value); - return 1; + PyDict_SetItem (obj, name, value); + Py_DECREF( (PyObject *) name); + Py_DECREF( (PyObject *) value); + return 1; } -int Object_arrayAddItem(JSOBJ obj, JSOBJ value) +int Object_arrayAddItem(void *prv, JSOBJ obj, JSOBJ value) { - PyList_Append(obj, value); - Py_DECREF( (PyObject *) value); - return 1; + PyList_Append(obj, value); + Py_DECREF( (PyObject *) value); + return 1; } -JSOBJ Object_newString(wchar_t *start, wchar_t *end) +JSOBJ Object_newString(void *prv, wchar_t *start, wchar_t *end) { - return PyUnicode_FromWideChar (start, (end - start)); + return PyUnicode_FromWideChar (start, (end - start)); } -JSOBJ Object_newTrue(void) +JSOBJ Object_newTrue(void *prv) { - Py_RETURN_TRUE; + Py_RETURN_TRUE; } -JSOBJ Object_newFalse(void) +JSOBJ Object_newFalse(void *prv) { - Py_RETURN_FALSE; + Py_RETURN_FALSE; } -JSOBJ Object_newNull(void) +JSOBJ Object_newNull(void *prv) { - Py_RETURN_NONE; + Py_RETURN_NONE; } -JSOBJ Object_newObject(void* decoder) +JSOBJ Object_newObject(void *prv, void* decoder) { - return PyDict_New(); + return PyDict_New(); } -JSOBJ Object_endObject(JSOBJ obj) +JSOBJ Object_endObject(void *prv, JSOBJ obj) { - return obj; + return obj; } -JSOBJ Object_newArray(void* decoder) +JSOBJ Object_newArray(void *prv, void* decoder) { - return PyList_New(0); + return PyList_New(0); } -JSOBJ Object_endArray(JSOBJ obj) +JSOBJ Object_endArray(void *prv, JSOBJ obj) { - return obj; + return obj; } -JSOBJ Object_newInteger(JSINT32 value) +JSOBJ Object_newInteger(void *prv, JSINT32 value) { - return PyInt_FromLong( (long) value); + return PyInt_FromLong( (long) value); } -JSOBJ Object_newLong(JSINT64 value) +JSOBJ Object_newLong(void *prv, JSINT64 value) { - return PyLong_FromLongLong (value); + return PyLong_FromLongLong (value); } -JSOBJ Object_newDouble(double value) +JSOBJ Object_newDouble(void *prv, double value) { - return PyFloat_FromDouble(value); + return PyFloat_FromDouble(value); } -static void Object_releaseObject(JSOBJ obj, void* _decoder) +static void Object_releaseObject(void *prv, JSOBJ obj, void* _decoder) { - PyObjectDecoder* decoder = (PyObjectDecoder*) _decoder; - if (obj != decoder->npyarr_addr) - { - Py_XDECREF( ((PyObject *)obj)); - } + PyObjectDecoder* decoder = (PyObjectDecoder*) _decoder; + if (obj != decoder->npyarr_addr) + { + Py_XDECREF( ((PyObject *)obj)); + } } +static char *g_kwlist[] = {"obj", "precise_float", "numpy", "labelled", "dtype", NULL}; PyObject* JSONToObj(PyObject* self, PyObject *args, PyObject *kwargs) { - PyObject *ret; - PyObject *sarg; - JSONObjectDecoder *decoder; - PyObjectDecoder pyDecoder; - PyArray_Descr *dtype = NULL; - static char *kwlist[] = { "obj", "numpy", "labelled", "dtype", NULL}; - int numpy = 0, labelled = 0, decref = 0; - // PRINTMARK(); - - JSONObjectDecoder dec = { - Object_newString, - Object_objectAddKey, - Object_arrayAddItem, - Object_newTrue, - Object_newFalse, - Object_newNull, - Object_newObject, - Object_endObject, - Object_newArray, - Object_endArray, - Object_newInteger, - Object_newLong, - Object_newDouble, - Object_releaseObject, - PyObject_Malloc, - PyObject_Free, - PyObject_Realloc, - }; - pyDecoder.dec = dec; - pyDecoder.curdim = 0; - pyDecoder.npyarr = NULL; - pyDecoder.npyarr_addr = NULL; - - decoder = (JSONObjectDecoder*) &pyDecoder; - - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|iiO&", kwlist, &sarg, &numpy, &labelled, PyArray_DescrConverter2, &dtype)) + PyObject *ret; + PyObject *sarg; + PyObject *arg; + PyObject *opreciseFloat = NULL; + JSONObjectDecoder *decoder; + PyObjectDecoder pyDecoder; + PyArray_Descr *dtype = NULL; + int numpy = 0, labelled = 0; + + JSONObjectDecoder dec = + { + Object_newString, + Object_objectAddKey, + Object_arrayAddItem, + Object_newTrue, + Object_newFalse, + Object_newNull, + Object_newObject, + Object_endObject, + Object_newArray, + Object_endArray, + Object_newInteger, + Object_newLong, + Object_newDouble, + Object_releaseObject, + PyObject_Malloc, + PyObject_Free, + PyObject_Realloc + }; + + dec.preciseFloat = 0; + dec.prv = NULL; + + pyDecoder.dec = dec; + pyDecoder.curdim = 0; + pyDecoder.npyarr = NULL; + pyDecoder.npyarr_addr = NULL; + + decoder = (JSONObjectDecoder*) &pyDecoder; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|OiiO&", g_kwlist, &arg, &opreciseFloat, &numpy, &labelled, PyArray_DescrConverter2, &dtype)) + { + Npy_releaseContext(pyDecoder.npyarr); + return NULL; + } + + if (opreciseFloat && PyObject_IsTrue(opreciseFloat)) + { + decoder->preciseFloat = 1; + } + + if (PyString_Check(arg)) + { + sarg = arg; + } + else + if (PyUnicode_Check(arg)) + { + sarg = PyUnicode_AsUTF8String(arg); + if (sarg == NULL) { - Npy_releaseContext(pyDecoder.npyarr); - return NULL; + //Exception raised above us by codec according to docs + return NULL; } + } + else + { + PyErr_Format(PyExc_TypeError, "Expected String or Unicode"); + return NULL; + } - if (PyUnicode_Check(sarg)) - { - sarg = PyUnicode_AsUTF8String(sarg); - if (sarg == NULL) - { - //Exception raised above us by codec according to docs - return NULL; - } - decref = 1; - } - else - if (!PyString_Check(sarg)) - { - PyErr_Format(PyExc_TypeError, "Expected String or Unicode"); - return NULL; - } + decoder->errorStr = NULL; + decoder->errorOffset = NULL; - if (numpy) + if (numpy) + { + pyDecoder.dtype = dtype; + decoder->newArray = Object_npyNewArray; + decoder->endArray = Object_npyEndArray; + decoder->arrayAddItem = Object_npyArrayAddItem; + + if (labelled) { - pyDecoder.dtype = dtype; - decoder->newArray = Object_npyNewArray; - decoder->endArray = Object_npyEndArray; - decoder->arrayAddItem = Object_npyArrayAddItem; - - if (labelled) - { - decoder->newObject = Object_npyNewObject; - decoder->endObject = Object_npyEndObject; - decoder->objectAddKey = Object_npyObjectAddKey; - } + decoder->newObject = Object_npyNewObject; + decoder->endObject = Object_npyEndObject; + decoder->objectAddKey = Object_npyObjectAddKey; } + } - decoder->errorStr = NULL; - decoder->errorOffset = NULL; + ret = JSON_DecodeObject(decoder, PyString_AS_STRING(sarg), PyString_GET_SIZE(sarg)); - PRINTMARK(); - ret = JSON_DecodeObject(decoder, PyString_AS_STRING(sarg), PyString_GET_SIZE(sarg)); - PRINTMARK(); + if (sarg != arg) + { + Py_DECREF(sarg); + } - if (decref) + if (PyErr_Occurred()) + { + if (ret) { - Py_DECREF(sarg); + Py_DECREF( (PyObject *) ret); } + Npy_releaseContext(pyDecoder.npyarr); + return NULL; + } - if (PyErr_Occurred()) - { - return NULL; - } + if (decoder->errorStr) + { + /* + FIXME: It's possible to give a much nicer error message here with actual failing element in input etc*/ - if (decoder->errorStr) - { - /*FIXME: It's possible to give a much nicer error message here with actual failing element in input etc*/ - PyErr_Format (PyExc_ValueError, "%s", decoder->errorStr); - Py_XDECREF( (PyObject *) ret); - Npy_releaseContext(pyDecoder.npyarr); + PyErr_Format (PyExc_ValueError, "%s", decoder->errorStr); - return NULL; + if (ret) + { + Py_DECREF( (PyObject *) ret); } + Npy_releaseContext(pyDecoder.npyarr); - return ret; + return NULL; + } + + return ret; } PyObject* JSONFileToObj(PyObject* self, PyObject *args, PyObject *kwargs) { - PyObject *file; - PyObject *read; - PyObject *string; - PyObject *result; - PyObject *argtuple; - - if (!PyArg_ParseTuple (args, "O", &file)) { - return NULL; - } - - if (!PyObject_HasAttrString (file, "read")) - { - PyErr_Format (PyExc_TypeError, "expected file"); - return NULL; - } + PyObject *read; + PyObject *string; + PyObject *result; + PyObject *file = NULL; + PyObject *argtuple; + + if (!PyArg_ParseTuple (args, "O", &file)) + { + return NULL; + } - read = PyObject_GetAttrString (file, "read"); + if (!PyObject_HasAttrString (file, "read")) + { + PyErr_Format (PyExc_TypeError, "expected file"); + return NULL; + } - if (!PyCallable_Check (read)) { - Py_XDECREF(read); - PyErr_Format (PyExc_TypeError, "expected file"); - return NULL; - } + read = PyObject_GetAttrString (file, "read"); - string = PyObject_CallObject (read, NULL); + if (!PyCallable_Check (read)) { Py_XDECREF(read); + PyErr_Format (PyExc_TypeError, "expected file"); + return NULL; + } - if (string == NULL) - { - return NULL; - } + string = PyObject_CallObject (read, NULL); + Py_XDECREF(read); - argtuple = PyTuple_Pack(1, string); + if (string == NULL) + { + return NULL; + } - result = JSONToObj (self, argtuple, kwargs); - Py_XDECREF(string); - Py_DECREF(argtuple); + argtuple = PyTuple_Pack(1, string); - if (result == NULL) { - return NULL; - } + result = JSONToObj (self, argtuple, kwargs); - return result; -} + Py_XDECREF(argtuple); + Py_XDECREF(string); + if (result == NULL) { + return NULL; + } + + return result; +} diff --git a/pandas/src/ujson/python/objToJSON.c b/pandas/src/ujson/python/objToJSON.c index 4fdd8dc91ab04..040f86793d206 100644 --- a/pandas/src/ujson/python/objToJSON.c +++ b/pandas/src/ujson/python/objToJSON.c @@ -1,3 +1,39 @@ +/* +Copyright (c) 2011-2013, ESN Social Software AB and Jonas Tarnstrom +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: +* Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +* Neither the name of the ESN Social Software AB nor the +names of its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL ESN SOCIAL SOFTWARE AB OR JONAS TARNSTROM BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +Portions of code from MODP_ASCII - Ascii transformations (upper/lower, etc) +http://code.google.com/p/stringencoders/ +Copyright (c) 2007 Nick Galbreath -- nickg [at] modp [dot] com. All rights reserved. + +Numeric decoder derived from from TCL library +http://www.opensource.apple.com/source/tcl/tcl-14/tcl/license.terms +* Copyright (c) 1988-1993 The Regents of the University of California. +* Copyright (c) 1994 Sun Microsystems, Inc. +*/ #define PY_ARRAY_UNIQUE_SYMBOL UJSON_NUMPY #include "py_defines.h" @@ -8,6 +44,9 @@ #include #include +#define EPOCH_ORD 719163 +static PyObject* type_decimal; + #define NPY_JSON_BUFSIZE 32768 static PyObject* cls_dataframe; @@ -16,55 +55,54 @@ static PyObject* cls_index; typedef void *(*PFN_PyTypeToJSON)(JSOBJ obj, JSONTypeContext *ti, void *outValue, size_t *_outLen); - #if (PY_VERSION_HEX < 0x02050000) typedef ssize_t Py_ssize_t; #endif typedef struct __NpyArrContext { - PyObject *array; - char* dataptr; - int was_datetime64; - int curdim; // current dimension in array's order - int stridedim; // dimension we are striding over - int inc; // stride dimension increment (+/- 1) - npy_intp dim; - npy_intp stride; - npy_intp ndim; - npy_intp index[NPY_MAXDIMS]; - PyArray_GetItemFunc* getitem; - - char** rowLabels; - char** columnLabels; + PyObject *array; + char* dataptr; + int was_datetime64; + int curdim; // current dimension in array's order + int stridedim; // dimension we are striding over + int inc; // stride dimension increment (+/- 1) + npy_intp dim; + npy_intp stride; + npy_intp ndim; + npy_intp index[NPY_MAXDIMS]; + PyArray_GetItemFunc* getitem; + + char** rowLabels; + char** columnLabels; } NpyArrContext; typedef struct __TypeContext { - JSPFN_ITERBEGIN iterBegin; - JSPFN_ITEREND iterEnd; - JSPFN_ITERNEXT iterNext; - JSPFN_ITERGETNAME iterGetName; - JSPFN_ITERGETVALUE iterGetValue; - PFN_PyTypeToJSON PyTypeToJSON; - PyObject *newObj; - PyObject *dictObj; - Py_ssize_t index; - Py_ssize_t size; - PyObject *itemValue; - PyObject *itemName; - PyObject *attrList; - char *citemName; - - JSINT64 longValue; - - NpyArrContext *npyarr; - int transpose; - char** rowLabels; - char** columnLabels; - npy_intp rowLabelsLen; - npy_intp columnLabelsLen; - + JSPFN_ITERBEGIN iterBegin; + JSPFN_ITEREND iterEnd; + JSPFN_ITERNEXT iterNext; + JSPFN_ITERGETNAME iterGetName; + JSPFN_ITERGETVALUE iterGetValue; + PFN_PyTypeToJSON PyTypeToJSON; + PyObject *newObj; + PyObject *dictObj; + Py_ssize_t index; + Py_ssize_t size; + PyObject *itemValue; + PyObject *itemName; + PyObject *attrList; + PyObject *iterator; + + JSINT64 longValue; + + char *citemName; + NpyArrContext *npyarr; + int transpose; + char** rowLabels; + char** columnLabels; + npy_intp rowLabelsLen; + npy_intp columnLabelsLen; } TypeContext; typedef struct __PyObjectEncoder @@ -83,18 +121,18 @@ typedef struct __PyObjectEncoder struct PyDictIterState { - PyObject *keys; - size_t i; - size_t sz; + PyObject *keys; + size_t i; + size_t sz; }; enum PANDAS_FORMAT { - SPLIT, - RECORDS, - INDEX, - COLUMNS, - VALUES + SPLIT, + RECORDS, + INDEX, + COLUMNS, + VALUES }; //#define PRINTMARK() fprintf(stderr, "%s: MARK(%d)\n", __FILE__, __LINE__) @@ -106,40 +144,45 @@ void initObjToJSON(void) int initObjToJSON(void) #endif { - PyObject *mod_frame; - PyDateTime_IMPORT; + PyObject* mod_decimal = PyImport_ImportModule("decimal"); + type_decimal = PyObject_GetAttrString(mod_decimal, "Decimal"); + Py_INCREF(type_decimal); + Py_DECREF(mod_decimal); - mod_frame = PyImport_ImportModule("pandas.core.frame"); - if (mod_frame) - { - cls_dataframe = PyObject_GetAttrString(mod_frame, "DataFrame"); - cls_index = PyObject_GetAttrString(mod_frame, "Index"); - cls_series = PyObject_GetAttrString(mod_frame, "Series"); - Py_DECREF(mod_frame); - } + PyDateTime_IMPORT; + PyObject *mod_frame; + + mod_frame = PyImport_ImportModule("pandas.core.frame"); + if (mod_frame) + { + cls_dataframe = PyObject_GetAttrString(mod_frame, "DataFrame"); + cls_index = PyObject_GetAttrString(mod_frame, "Index"); + cls_series = PyObject_GetAttrString(mod_frame, "Series"); + Py_DECREF(mod_frame); + } - /* Initialise numpy API */ - import_array(); + /* Initialise numpy API */ + import_array(); } static void *PyIntToINT32(JSOBJ _obj, JSONTypeContext *tc, void *outValue, size_t *_outLen) { - PyObject *obj = (PyObject *) _obj; - *((JSINT32 *) outValue) = PyInt_AS_LONG (obj); - return NULL; + PyObject *obj = (PyObject *) _obj; + *((JSINT32 *) outValue) = PyInt_AS_LONG (obj); + return NULL; } static void *PyIntToINT64(JSOBJ _obj, JSONTypeContext *tc, void *outValue, size_t *_outLen) { - PyObject *obj = (PyObject *) _obj; - *((JSINT64 *) outValue) = PyInt_AS_LONG (obj); - return NULL; + PyObject *obj = (PyObject *) _obj; + *((JSINT64 *) outValue) = PyInt_AS_LONG (obj); + return NULL; } static void *PyLongToINT64(JSOBJ _obj, JSONTypeContext *tc, void *outValue, size_t *_outLen) { - *((JSINT64 *) outValue) = GET_TC(tc)->longValue; - return NULL; + *((JSINT64 *) outValue) = GET_TC(tc)->longValue; + return NULL; } static void *NpyFloatToDOUBLE(JSOBJ _obj, JSONTypeContext *tc, void *outValue, size_t *_outLen) @@ -151,27 +194,27 @@ static void *NpyFloatToDOUBLE(JSOBJ _obj, JSONTypeContext *tc, void *outValue, s static void *PyFloatToDOUBLE(JSOBJ _obj, JSONTypeContext *tc, void *outValue, size_t *_outLen) { - PyObject *obj = (PyObject *) _obj; - *((double *) outValue) = PyFloat_AS_DOUBLE (obj); - return NULL; + PyObject *obj = (PyObject *) _obj; + *((double *) outValue) = PyFloat_AsDouble (obj); + return NULL; } static void *PyStringToUTF8(JSOBJ _obj, JSONTypeContext *tc, void *outValue, size_t *_outLen) { - PyObject *obj = (PyObject *) _obj; - *_outLen = PyString_GET_SIZE(obj); - return PyString_AS_STRING(obj); + PyObject *obj = (PyObject *) _obj; + *_outLen = PyString_GET_SIZE(obj); + return PyString_AS_STRING(obj); } static void *PyUnicodeToUTF8(JSOBJ _obj, JSONTypeContext *tc, void *outValue, size_t *_outLen) { - PyObject *obj = (PyObject *) _obj; - PyObject *newObj = PyUnicode_AsUTF8String (obj); + PyObject *obj = (PyObject *) _obj; + PyObject *newObj = PyUnicode_EncodeUTF8 (PyUnicode_AS_UNICODE(obj), PyUnicode_GET_SIZE(obj), NULL); - GET_TC(tc)->newObj = newObj; + GET_TC(tc)->newObj = newObj; - *_outLen = PyString_GET_SIZE(newObj); - return PyString_AS_STRING(newObj); + *_outLen = PyString_GET_SIZE(newObj); + return PyString_AS_STRING(newObj); } static void *NpyDateTimeToINT64(JSOBJ _obj, JSONTypeContext *tc, void *outValue, size_t *_outLen) @@ -183,32 +226,32 @@ static void *NpyDateTimeToINT64(JSOBJ _obj, JSONTypeContext *tc, void *outValue, static void *PyDateTimeToINT64(JSOBJ _obj, JSONTypeContext *tc, void *outValue, size_t *_outLen) { - pandas_datetimestruct dts; - PyObject *obj = (PyObject *) _obj; + pandas_datetimestruct dts; + PyObject *obj = (PyObject *) _obj; - dts.year = PyDateTime_GET_YEAR(obj); - dts.month = PyDateTime_GET_MONTH(obj); - dts.day = PyDateTime_GET_DAY(obj); - dts.hour = PyDateTime_DATE_GET_HOUR(obj); - dts.min = PyDateTime_DATE_GET_MINUTE(obj); - dts.sec = PyDateTime_DATE_GET_SECOND(obj); - dts.us = PyDateTime_DATE_GET_MICROSECOND(obj); - dts.ps = dts.as = 0; - *((JSINT64*)outValue) = (JSINT64) pandas_datetimestruct_to_datetime(PANDAS_FR_ns, &dts); - return NULL; + dts.year = PyDateTime_GET_YEAR(obj); + dts.month = PyDateTime_GET_MONTH(obj); + dts.day = PyDateTime_GET_DAY(obj); + dts.hour = PyDateTime_DATE_GET_HOUR(obj); + dts.min = PyDateTime_DATE_GET_MINUTE(obj); + dts.sec = PyDateTime_DATE_GET_SECOND(obj); + dts.us = PyDateTime_DATE_GET_MICROSECOND(obj); + dts.ps = dts.as = 0; + *((JSINT64*)outValue) = (JSINT64) pandas_datetimestruct_to_datetime(PANDAS_FR_ns, &dts); + return NULL; } static void *PyDateToINT64(JSOBJ _obj, JSONTypeContext *tc, void *outValue, size_t *_outLen) { - pandas_datetimestruct dts; - PyObject *obj = (PyObject *) _obj; + pandas_datetimestruct dts; + PyObject *obj = (PyObject *) _obj; - dts.year = PyDateTime_GET_YEAR(obj); - dts.month = PyDateTime_GET_MONTH(obj); - dts.day = PyDateTime_GET_DAY(obj); - dts.hour = dts.min = dts.sec = dts.ps = dts.as = 0; - *((JSINT64*)outValue) = (JSINT64) pandas_datetimestruct_to_datetime(PANDAS_FR_ns, &dts); - return NULL; + dts.year = PyDateTime_GET_YEAR(obj); + dts.month = PyDateTime_GET_MONTH(obj); + dts.day = PyDateTime_GET_DAY(obj); + dts.hour = dts.min = dts.sec = dts.ps = dts.as = 0; + *((JSINT64*)outValue) = (JSINT64) pandas_datetimestruct_to_datetime(PANDAS_FR_ns, &dts); + return NULL; } //============================================================================= @@ -216,200 +259,200 @@ static void *PyDateToINT64(JSOBJ _obj, JSONTypeContext *tc, void *outValue, size //============================================================================= int NpyArr_iterNextNone(JSOBJ _obj, JSONTypeContext *tc) { - return 0; + return 0; } void NpyArr_iterBegin(JSOBJ _obj, JSONTypeContext *tc) { - PyArrayObject *obj; - PyArray_Descr *dtype; - NpyArrContext *npyarr; + PyArrayObject *obj; + PyArray_Descr *dtype; + NpyArrContext *npyarr; + + if (GET_TC(tc)->newObj) + { + obj = (PyArrayObject *) GET_TC(tc)->newObj; + } + else + { + obj = (PyArrayObject *) _obj; + } + + if (PyArray_SIZE(obj) > 0) + { + PRINTMARK(); + npyarr = PyObject_Malloc(sizeof(NpyArrContext)); + GET_TC(tc)->npyarr = npyarr; - if (GET_TC(tc)->newObj) + if (!npyarr) { - obj = (PyArrayObject *) GET_TC(tc)->newObj; - } - else - { - obj = (PyArrayObject *) _obj; + PyErr_NoMemory(); + GET_TC(tc)->iterNext = NpyArr_iterNextNone; + return; } - if (PyArray_SIZE(obj) > 0) - { - PRINTMARK(); - npyarr = PyObject_Malloc(sizeof(NpyArrContext)); - GET_TC(tc)->npyarr = npyarr; - - if (!npyarr) - { - PyErr_NoMemory(); - GET_TC(tc)->iterNext = NpyArr_iterNextNone; - return; - } - - // uber hack to support datetime64[ns] arrays - if (PyArray_DESCR(obj)->type_num == NPY_DATETIME) { - npyarr->was_datetime64 = 1; - dtype = PyArray_DescrFromType(NPY_INT64); - obj = (PyArrayObject *) PyArray_CastToType(obj, dtype, 0); - } else { - npyarr->was_datetime64 = 0; - } + // uber hack to support datetime64[ns] arrays + if (PyArray_DESCR(obj)->type_num == NPY_DATETIME) { + npyarr->was_datetime64 = 1; + dtype = PyArray_DescrFromType(NPY_INT64); + obj = (PyArrayObject *) PyArray_CastToType(obj, dtype, 0); + } else { + npyarr->was_datetime64 = 0; + } - npyarr->array = (PyObject*) obj; - npyarr->getitem = (PyArray_GetItemFunc*) PyArray_DESCR(obj)->f->getitem; - npyarr->dataptr = PyArray_DATA(obj); - npyarr->ndim = PyArray_NDIM(obj) - 1; - npyarr->curdim = 0; + npyarr->array = (PyObject*) obj; + npyarr->getitem = (PyArray_GetItemFunc*) PyArray_DESCR(obj)->f->getitem; + npyarr->dataptr = PyArray_DATA(obj); + npyarr->ndim = PyArray_NDIM(obj) - 1; + npyarr->curdim = 0; - if (GET_TC(tc)->transpose) - { - npyarr->dim = PyArray_DIM(obj, npyarr->ndim); - npyarr->stride = PyArray_STRIDE(obj, npyarr->ndim); - npyarr->stridedim = npyarr->ndim; - npyarr->index[npyarr->ndim] = 0; - npyarr->inc = -1; - } - else - { - npyarr->dim = PyArray_DIM(obj, 0); - npyarr->stride = PyArray_STRIDE(obj, 0); - npyarr->stridedim = 0; - npyarr->index[0] = 0; - npyarr->inc = 1; - } - - npyarr->columnLabels = GET_TC(tc)->columnLabels; - npyarr->rowLabels = GET_TC(tc)->rowLabels; + if (GET_TC(tc)->transpose) + { + npyarr->dim = PyArray_DIM(obj, npyarr->ndim); + npyarr->stride = PyArray_STRIDE(obj, npyarr->ndim); + npyarr->stridedim = npyarr->ndim; + npyarr->index[npyarr->ndim] = 0; + npyarr->inc = -1; } else { - GET_TC(tc)->iterNext = NpyArr_iterNextNone; + npyarr->dim = PyArray_DIM(obj, 0); + npyarr->stride = PyArray_STRIDE(obj, 0); + npyarr->stridedim = 0; + npyarr->index[0] = 0; + npyarr->inc = 1; } - PRINTMARK(); + + npyarr->columnLabels = GET_TC(tc)->columnLabels; + npyarr->rowLabels = GET_TC(tc)->rowLabels; + } + else + { + GET_TC(tc)->iterNext = NpyArr_iterNextNone; + } + PRINTMARK(); } void NpyArr_iterEnd(JSOBJ obj, JSONTypeContext *tc) { - NpyArrContext *npyarr = GET_TC(tc)->npyarr; - - if (npyarr) - { - if (npyarr->was_datetime64) { - Py_XDECREF(npyarr->array); - } + NpyArrContext *npyarr = GET_TC(tc)->npyarr; - if (GET_TC(tc)->itemValue != npyarr->array) - { - Py_XDECREF(GET_TC(tc)->itemValue); - } - GET_TC(tc)->itemValue = NULL; + if (npyarr) + { + if (npyarr->was_datetime64) { + Py_XDECREF(npyarr->array); + } - PyObject_Free(npyarr); + if (GET_TC(tc)->itemValue != npyarr->array) + { + Py_XDECREF(GET_TC(tc)->itemValue); } - PRINTMARK(); + GET_TC(tc)->itemValue = NULL; + + PyObject_Free(npyarr); + } + PRINTMARK(); } void NpyArrPassThru_iterBegin(JSOBJ obj, JSONTypeContext *tc) { - PRINTMARK(); + PRINTMARK(); } void NpyArrPassThru_iterEnd(JSOBJ obj, JSONTypeContext *tc) { - NpyArrContext* npyarr; - PRINTMARK(); - // finished this dimension, reset the data pointer - npyarr = GET_TC(tc)->npyarr; - npyarr->curdim--; - npyarr->dataptr -= npyarr->stride * npyarr->index[npyarr->stridedim]; - npyarr->stridedim -= npyarr->inc; - npyarr->dim = PyArray_DIM(npyarr->array, npyarr->stridedim); - npyarr->stride = PyArray_STRIDE(npyarr->array, npyarr->stridedim); - npyarr->dataptr += npyarr->stride; - - if (GET_TC(tc)->itemValue != npyarr->array) - { - Py_XDECREF(GET_TC(tc)->itemValue); - GET_TC(tc)->itemValue = NULL; - } + NpyArrContext* npyarr; + PRINTMARK(); + // finished this dimension, reset the data pointer + npyarr = GET_TC(tc)->npyarr; + npyarr->curdim--; + npyarr->dataptr -= npyarr->stride * npyarr->index[npyarr->stridedim]; + npyarr->stridedim -= npyarr->inc; + npyarr->dim = PyArray_DIM(npyarr->array, npyarr->stridedim); + npyarr->stride = PyArray_STRIDE(npyarr->array, npyarr->stridedim); + npyarr->dataptr += npyarr->stride; + + if (GET_TC(tc)->itemValue != npyarr->array) + { + Py_XDECREF(GET_TC(tc)->itemValue); + GET_TC(tc)->itemValue = NULL; + } } int NpyArr_iterNextItem(JSOBJ _obj, JSONTypeContext *tc) { - NpyArrContext* npyarr; - PRINTMARK(); - npyarr = GET_TC(tc)->npyarr; + NpyArrContext* npyarr; + PRINTMARK(); + npyarr = GET_TC(tc)->npyarr; - if (GET_TC(tc)->itemValue != npyarr->array) - { - Py_XDECREF(GET_TC(tc)->itemValue); - GET_TC(tc)->itemValue = NULL; - } + if (GET_TC(tc)->itemValue != npyarr->array) + { + Py_XDECREF(GET_TC(tc)->itemValue); + GET_TC(tc)->itemValue = NULL; + } - if (npyarr->index[npyarr->stridedim] >= npyarr->dim) - { - return 0; - } + if (npyarr->index[npyarr->stridedim] >= npyarr->dim) + { + return 0; + } - GET_TC(tc)->itemValue = npyarr->getitem(npyarr->dataptr, npyarr->array); + GET_TC(tc)->itemValue = npyarr->getitem(npyarr->dataptr, npyarr->array); - npyarr->dataptr += npyarr->stride; - npyarr->index[npyarr->stridedim]++; - return 1; + npyarr->dataptr += npyarr->stride; + npyarr->index[npyarr->stridedim]++; + return 1; } int NpyArr_iterNext(JSOBJ _obj, JSONTypeContext *tc) { - NpyArrContext* npyarr; - PRINTMARK(); - npyarr = GET_TC(tc)->npyarr; + NpyArrContext* npyarr; + PRINTMARK(); + npyarr = GET_TC(tc)->npyarr; - if (npyarr->curdim >= npyarr->ndim || npyarr->index[npyarr->stridedim] >= npyarr->dim) - { - // innermost dimension, start retrieving item values - GET_TC(tc)->iterNext = NpyArr_iterNextItem; - return NpyArr_iterNextItem(_obj, tc); - } + if (npyarr->curdim >= npyarr->ndim || npyarr->index[npyarr->stridedim] >= npyarr->dim) + { + // innermost dimension, start retrieving item values + GET_TC(tc)->iterNext = NpyArr_iterNextItem; + return NpyArr_iterNextItem(_obj, tc); + } - // dig a dimension deeper - npyarr->index[npyarr->stridedim]++; + // dig a dimension deeper + npyarr->index[npyarr->stridedim]++; - npyarr->curdim++; - npyarr->stridedim += npyarr->inc; - npyarr->dim = PyArray_DIM(npyarr->array, npyarr->stridedim); - npyarr->stride = PyArray_STRIDE(npyarr->array, npyarr->stridedim); - npyarr->index[npyarr->stridedim] = 0; + npyarr->curdim++; + npyarr->stridedim += npyarr->inc; + npyarr->dim = PyArray_DIM(npyarr->array, npyarr->stridedim); + npyarr->stride = PyArray_STRIDE(npyarr->array, npyarr->stridedim); + npyarr->index[npyarr->stridedim] = 0; - ((PyObjectEncoder*) tc->encoder)->npyCtxtPassthru = npyarr; - GET_TC(tc)->itemValue = npyarr->array; - return 1; + ((PyObjectEncoder*) tc->encoder)->npyCtxtPassthru = npyarr; + GET_TC(tc)->itemValue = npyarr->array; + return 1; } JSOBJ NpyArr_iterGetValue(JSOBJ obj, JSONTypeContext *tc) { - PRINTMARK(); - return GET_TC(tc)->itemValue; + PRINTMARK(); + return GET_TC(tc)->itemValue; } char *NpyArr_iterGetName(JSOBJ obj, JSONTypeContext *tc, size_t *outLen) { - NpyArrContext* npyarr; - npy_intp idx; - PRINTMARK(); - npyarr = GET_TC(tc)->npyarr; - if (GET_TC(tc)->iterNext == NpyArr_iterNextItem) - { - idx = npyarr->index[npyarr->stridedim] - 1; - *outLen = strlen(npyarr->columnLabels[idx]); - return npyarr->columnLabels[idx]; - } - else - { - idx = npyarr->index[npyarr->stridedim - npyarr->inc] - 1; - *outLen = strlen(npyarr->rowLabels[idx]); - return npyarr->rowLabels[idx]; - } + NpyArrContext* npyarr; + npy_intp idx; + PRINTMARK(); + npyarr = GET_TC(tc)->npyarr; + if (GET_TC(tc)->iterNext == NpyArr_iterNextItem) + { + idx = npyarr->index[npyarr->stridedim] - 1; + *outLen = strlen(npyarr->columnLabels[idx]); + return npyarr->columnLabels[idx]; + } + else + { + idx = npyarr->index[npyarr->stridedim - npyarr->inc] - 1; + *outLen = strlen(npyarr->rowLabels[idx]); + return npyarr->rowLabels[idx]; + } } //============================================================================= @@ -418,25 +461,25 @@ char *NpyArr_iterGetName(JSOBJ obj, JSONTypeContext *tc, size_t *outLen) //============================================================================= void Tuple_iterBegin(JSOBJ obj, JSONTypeContext *tc) { - GET_TC(tc)->index = 0; - GET_TC(tc)->size = PyTuple_GET_SIZE( (PyObject *) obj); - GET_TC(tc)->itemValue = NULL; + GET_TC(tc)->index = 0; + GET_TC(tc)->size = PyTuple_GET_SIZE( (PyObject *) obj); + GET_TC(tc)->itemValue = NULL; } int Tuple_iterNext(JSOBJ obj, JSONTypeContext *tc) { - PyObject *item; + PyObject *item; - if (GET_TC(tc)->index >= GET_TC(tc)->size) - { - return 0; - } + if (GET_TC(tc)->index >= GET_TC(tc)->size) + { + return 0; + } - item = PyTuple_GET_ITEM (obj, GET_TC(tc)->index); + item = PyTuple_GET_ITEM (obj, GET_TC(tc)->index); - GET_TC(tc)->itemValue = item; - GET_TC(tc)->index ++; - return 1; + GET_TC(tc)->itemValue = item; + GET_TC(tc)->index ++; + return 1; } void Tuple_iterEnd(JSOBJ obj, JSONTypeContext *tc) @@ -445,12 +488,68 @@ void Tuple_iterEnd(JSOBJ obj, JSONTypeContext *tc) JSOBJ Tuple_iterGetValue(JSOBJ obj, JSONTypeContext *tc) { - return GET_TC(tc)->itemValue; + return GET_TC(tc)->itemValue; } char *Tuple_iterGetName(JSOBJ obj, JSONTypeContext *tc, size_t *outLen) { - return NULL; + return NULL; +} + +//============================================================================= +// Iterator iteration functions +// itemValue is borrowed reference, no ref counting +//============================================================================= +void Iter_iterBegin(JSOBJ obj, JSONTypeContext *tc) +{ + GET_TC(tc)->itemValue = NULL; + GET_TC(tc)->iterator = PyObject_GetIter(obj); +} + +int Iter_iterNext(JSOBJ obj, JSONTypeContext *tc) +{ + PyObject *item; + + if (GET_TC(tc)->itemValue) + { + Py_DECREF(GET_TC(tc)->itemValue); + GET_TC(tc)->itemValue = NULL; + } + + item = PyIter_Next(GET_TC(tc)->iterator); + + if (item == NULL) + { + return 0; + } + + GET_TC(tc)->itemValue = item; + return 1; +} + +void Iter_iterEnd(JSOBJ obj, JSONTypeContext *tc) +{ + if (GET_TC(tc)->itemValue) + { + Py_DECREF(GET_TC(tc)->itemValue); + GET_TC(tc)->itemValue = NULL; + } + + if (GET_TC(tc)->iterator) + { + Py_DECREF(GET_TC(tc)->iterator); + GET_TC(tc)->iterator = NULL; + } +} + +JSOBJ Iter_iterGetValue(JSOBJ obj, JSONTypeContext *tc) +{ + return GET_TC(tc)->itemValue; +} + +char *Iter_iterGetName(JSOBJ obj, JSONTypeContext *tc, size_t *outLen) +{ + return NULL; } //============================================================================= @@ -460,97 +559,84 @@ char *Tuple_iterGetName(JSOBJ obj, JSONTypeContext *tc, size_t *outLen) //============================================================================= void Dir_iterBegin(JSOBJ obj, JSONTypeContext *tc) { - GET_TC(tc)->attrList = PyObject_Dir(obj); - GET_TC(tc)->index = 0; - GET_TC(tc)->size = PyList_GET_SIZE(GET_TC(tc)->attrList); - PRINTMARK(); + GET_TC(tc)->attrList = PyObject_Dir(obj); + GET_TC(tc)->index = 0; + GET_TC(tc)->size = PyList_GET_SIZE(GET_TC(tc)->attrList); + PRINTMARK(); } void Dir_iterEnd(JSOBJ obj, JSONTypeContext *tc) { - if (GET_TC(tc)->itemValue) - { - Py_DECREF(GET_TC(tc)->itemValue); - GET_TC(tc)->itemValue = NULL; - } + if (GET_TC(tc)->itemValue) + { + Py_DECREF(GET_TC(tc)->itemValue); + GET_TC(tc)->itemValue = NULL; + } - if (GET_TC(tc)->itemName) - { - Py_DECREF(GET_TC(tc)->itemName); - GET_TC(tc)->itemName = NULL; - } + if (GET_TC(tc)->itemName) + { + Py_DECREF(GET_TC(tc)->itemName); + GET_TC(tc)->itemName = NULL; + } - Py_DECREF( (PyObject *) GET_TC(tc)->attrList); - PRINTMARK(); + Py_DECREF( (PyObject *) GET_TC(tc)->attrList); + PRINTMARK(); } int Dir_iterNext(JSOBJ _obj, JSONTypeContext *tc) { - PyObject *obj = (PyObject *) _obj; - PyObject *itemValue = GET_TC(tc)->itemValue; - PyObject *itemName = GET_TC(tc)->itemName; - PyObject* attr; - PyObject* attrName; - char* attrStr; - + PyObject *obj = (PyObject *) _obj; + PyObject *itemValue = GET_TC(tc)->itemValue; + PyObject *itemName = GET_TC(tc)->itemName; + PyObject* attr; + PyObject* attrName; + char* attrStr; + + if (itemValue) + { + Py_DECREF(GET_TC(tc)->itemValue); + GET_TC(tc)->itemValue = itemValue = NULL; + } + + if (itemName) + { + Py_DECREF(GET_TC(tc)->itemName); + GET_TC(tc)->itemName = itemName = NULL; + } + + for (; GET_TC(tc)->index < GET_TC(tc)->size; GET_TC(tc)->index ++) + { + attrName = PyList_GET_ITEM(GET_TC(tc)->attrList, GET_TC(tc)->index); +#if PY_MAJOR_VERSION >= 3 + attr = PyUnicode_AsUTF8String(attrName); +#else + attr = attrName; + Py_INCREF(attr); +#endif + attrStr = PyString_AS_STRING(attr); - if (itemValue) + if (attrStr[0] == '_') { - Py_DECREF(GET_TC(tc)->itemValue); - GET_TC(tc)->itemValue = itemValue = NULL; + PRINTMARK(); + Py_DECREF(attr); + continue; } - if (itemName) + itemValue = PyObject_GetAttr(obj, attrName); + if (itemValue == NULL) { - Py_DECREF(GET_TC(tc)->itemName); - GET_TC(tc)->itemName = itemName = NULL; + PyErr_Clear(); + Py_DECREF(attr); + PRINTMARK(); + continue; } - for (; GET_TC(tc)->index < GET_TC(tc)->size; GET_TC(tc)->index ++) + if (PyCallable_Check(itemValue)) { - attrName = PyList_GET_ITEM(GET_TC(tc)->attrList, GET_TC(tc)->index); -#if PY_MAJOR_VERSION >= 3 - attr = PyUnicode_AsUTF8String(attrName); -#else - attr = attrName; - Py_INCREF(attr); -#endif - attrStr = PyString_AS_STRING(attr); - - if (attrStr[0] == '_') - { - PRINTMARK(); - Py_DECREF(attr); - continue; - } - - itemValue = PyObject_GetAttr(obj, attrName); - if (itemValue == NULL) - { - PyErr_Clear(); - Py_DECREF(attr); - PRINTMARK(); - continue; - } - - if (PyCallable_Check(itemValue)) - { - Py_DECREF(itemValue); - Py_DECREF(attr); - PRINTMARK(); - continue; - } - - PRINTMARK(); - itemName = attr; - break; - } - - if (itemName == NULL) - { - GET_TC(tc)->index = GET_TC(tc)->size; - GET_TC(tc)->itemValue = NULL; - return 0; + Py_DECREF(itemValue); + Py_DECREF(attr); + PRINTMARK(); + continue; } GET_TC(tc)->itemName = itemName; @@ -558,48 +644,60 @@ int Dir_iterNext(JSOBJ _obj, JSONTypeContext *tc) GET_TC(tc)->index ++; PRINTMARK(); - return 1; -} + itemName = attr; + break; + } + if (itemName == NULL) + { + GET_TC(tc)->index = GET_TC(tc)->size; + GET_TC(tc)->itemValue = NULL; + return 0; + } + + GET_TC(tc)->itemName = itemName; + GET_TC(tc)->itemValue = itemValue; + GET_TC(tc)->index ++; + PRINTMARK(); + return 1; +} JSOBJ Dir_iterGetValue(JSOBJ obj, JSONTypeContext *tc) { - PRINTMARK(); - return GET_TC(tc)->itemValue; + PRINTMARK(); + return GET_TC(tc)->itemValue; } char *Dir_iterGetName(JSOBJ obj, JSONTypeContext *tc, size_t *outLen) { - PRINTMARK(); - *outLen = PyString_GET_SIZE(GET_TC(tc)->itemName); - return PyString_AS_STRING(GET_TC(tc)->itemName); + PRINTMARK(); + *outLen = PyString_GET_SIZE(GET_TC(tc)->itemName); + return PyString_AS_STRING(GET_TC(tc)->itemName); } - - //============================================================================= // List iteration functions // itemValue is borrowed from object (which is list). No refcounting //============================================================================= void List_iterBegin(JSOBJ obj, JSONTypeContext *tc) { - GET_TC(tc)->index = 0; - GET_TC(tc)->size = PyList_GET_SIZE( (PyObject *) obj); + GET_TC(tc)->index = 0; + GET_TC(tc)->size = PyList_GET_SIZE( (PyObject *) obj); } int List_iterNext(JSOBJ obj, JSONTypeContext *tc) { - if (GET_TC(tc)->index >= GET_TC(tc)->size) - { - PRINTMARK(); - return 0; - } + if (GET_TC(tc)->index >= GET_TC(tc)->size) + { + PRINTMARK(); + return 0; + } - GET_TC(tc)->itemValue = PyList_GET_ITEM (obj, GET_TC(tc)->index); - GET_TC(tc)->index ++; - return 1; + GET_TC(tc)->itemValue = PyList_GET_ITEM (obj, GET_TC(tc)->index); + GET_TC(tc)->index ++; + return 1; } void List_iterEnd(JSOBJ obj, JSONTypeContext *tc) @@ -608,12 +706,12 @@ void List_iterEnd(JSOBJ obj, JSONTypeContext *tc) JSOBJ List_iterGetValue(JSOBJ obj, JSONTypeContext *tc) { - return GET_TC(tc)->itemValue; + return GET_TC(tc)->itemValue; } char *List_iterGetName(JSOBJ obj, JSONTypeContext *tc, size_t *outLen) { - return NULL; + return NULL; } //============================================================================= @@ -621,65 +719,65 @@ char *List_iterGetName(JSOBJ obj, JSONTypeContext *tc, size_t *outLen) //============================================================================= void Index_iterBegin(JSOBJ obj, JSONTypeContext *tc) { - GET_TC(tc)->index = 0; - GET_TC(tc)->citemName = PyObject_Malloc(20 * sizeof(char)); - if (!GET_TC(tc)->citemName) - { - PyErr_NoMemory(); - } - PRINTMARK(); + GET_TC(tc)->index = 0; + GET_TC(tc)->citemName = PyObject_Malloc(20 * sizeof(char)); + if (!GET_TC(tc)->citemName) + { + PyErr_NoMemory(); + } + PRINTMARK(); } int Index_iterNext(JSOBJ obj, JSONTypeContext *tc) { - Py_ssize_t index; - if (!GET_TC(tc)->citemName) - { - return 0; - } - - index = GET_TC(tc)->index; - Py_XDECREF(GET_TC(tc)->itemValue); - if (index == 0) - { - memcpy(GET_TC(tc)->citemName, "name", sizeof(char)*5); - GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "name"); - } - else + Py_ssize_t index; + if (!GET_TC(tc)->citemName) + { + return 0; + } + + index = GET_TC(tc)->index; + Py_XDECREF(GET_TC(tc)->itemValue); + if (index == 0) + { + memcpy(GET_TC(tc)->citemName, "name", sizeof(char)*5); + GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "name"); + } + else if (index == 1) { - memcpy(GET_TC(tc)->citemName, "data", sizeof(char)*5); - GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "values"); + memcpy(GET_TC(tc)->citemName, "data", sizeof(char)*5); + GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "values"); } else { - PRINTMARK(); - return 0; + PRINTMARK(); + return 0; } - GET_TC(tc)->index++; - PRINTMARK(); - return 1; + GET_TC(tc)->index++; + PRINTMARK(); + return 1; } void Index_iterEnd(JSOBJ obj, JSONTypeContext *tc) { - if (GET_TC(tc)->citemName) - { - PyObject_Free(GET_TC(tc)->citemName); - } - PRINTMARK(); + if (GET_TC(tc)->citemName) + { + PyObject_Free(GET_TC(tc)->citemName); + } + PRINTMARK(); } JSOBJ Index_iterGetValue(JSOBJ obj, JSONTypeContext *tc) { - return GET_TC(tc)->itemValue; + return GET_TC(tc)->itemValue; } char *Index_iterGetName(JSOBJ obj, JSONTypeContext *tc, size_t *outLen) { - *outLen = strlen(GET_TC(tc)->citemName); - return GET_TC(tc)->citemName; + *outLen = strlen(GET_TC(tc)->citemName); + return GET_TC(tc)->citemName; } //============================================================================= @@ -687,75 +785,75 @@ char *Index_iterGetName(JSOBJ obj, JSONTypeContext *tc, size_t *outLen) //============================================================================= void Series_iterBegin(JSOBJ obj, JSONTypeContext *tc) { - PyObjectEncoder* enc = (PyObjectEncoder*) tc->encoder; - GET_TC(tc)->index = 0; - GET_TC(tc)->citemName = PyObject_Malloc(20 * sizeof(char)); - enc->outputFormat = VALUES; // for contained series - if (!GET_TC(tc)->citemName) - { - PyErr_NoMemory(); - } - PRINTMARK(); + PyObjectEncoder* enc = (PyObjectEncoder*) tc->encoder; + GET_TC(tc)->index = 0; + GET_TC(tc)->citemName = PyObject_Malloc(20 * sizeof(char)); + enc->outputFormat = VALUES; // for contained series + if (!GET_TC(tc)->citemName) + { + PyErr_NoMemory(); + } + PRINTMARK(); } int Series_iterNext(JSOBJ obj, JSONTypeContext *tc) { - Py_ssize_t index; - if (!GET_TC(tc)->citemName) - { - return 0; - } - - index = GET_TC(tc)->index; - Py_XDECREF(GET_TC(tc)->itemValue); - if (index == 0) - { - memcpy(GET_TC(tc)->citemName, "name", sizeof(char)*5); - GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "name"); - } - else - if (index == 1) - { - memcpy(GET_TC(tc)->citemName, "index", sizeof(char)*6); - GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "index"); - } - else - if (index == 2) - { - memcpy(GET_TC(tc)->citemName, "data", sizeof(char)*5); - GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "values"); - } - else - { - PRINTMARK(); - return 0; - } - - GET_TC(tc)->index++; + Py_ssize_t index; + if (!GET_TC(tc)->citemName) + { + return 0; + } + + index = GET_TC(tc)->index; + Py_XDECREF(GET_TC(tc)->itemValue); + if (index == 0) + { + memcpy(GET_TC(tc)->citemName, "name", sizeof(char)*5); + GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "name"); + } + else + if (index == 1) + { + memcpy(GET_TC(tc)->citemName, "index", sizeof(char)*6); + GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "index"); + } + else + if (index == 2) + { + memcpy(GET_TC(tc)->citemName, "data", sizeof(char)*5); + GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "values"); + } + else + { PRINTMARK(); - return 1; + return 0; + } + + GET_TC(tc)->index++; + PRINTMARK(); + return 1; } void Series_iterEnd(JSOBJ obj, JSONTypeContext *tc) { - PyObjectEncoder* enc = (PyObjectEncoder*) tc->encoder; - enc->outputFormat = enc->originalOutputFormat; - if (GET_TC(tc)->citemName) - { - PyObject_Free(GET_TC(tc)->citemName); - } - PRINTMARK(); + PyObjectEncoder* enc = (PyObjectEncoder*) tc->encoder; + enc->outputFormat = enc->originalOutputFormat; + if (GET_TC(tc)->citemName) + { + PyObject_Free(GET_TC(tc)->citemName); + } + PRINTMARK(); } JSOBJ Series_iterGetValue(JSOBJ obj, JSONTypeContext *tc) { - return GET_TC(tc)->itemValue; + return GET_TC(tc)->itemValue; } char *Series_iterGetName(JSOBJ obj, JSONTypeContext *tc, size_t *outLen) { - *outLen = strlen(GET_TC(tc)->citemName); - return GET_TC(tc)->citemName; + *outLen = strlen(GET_TC(tc)->citemName); + return GET_TC(tc)->citemName; } //============================================================================= @@ -763,75 +861,75 @@ char *Series_iterGetName(JSOBJ obj, JSONTypeContext *tc, size_t *outLen) //============================================================================= void DataFrame_iterBegin(JSOBJ obj, JSONTypeContext *tc) { - PyObjectEncoder* enc = (PyObjectEncoder*) tc->encoder; - GET_TC(tc)->index = 0; - GET_TC(tc)->citemName = PyObject_Malloc(20 * sizeof(char)); - enc->outputFormat = VALUES; // for contained series & index - if (!GET_TC(tc)->citemName) - { - PyErr_NoMemory(); - } - PRINTMARK(); + PyObjectEncoder* enc = (PyObjectEncoder*) tc->encoder; + GET_TC(tc)->index = 0; + GET_TC(tc)->citemName = PyObject_Malloc(20 * sizeof(char)); + enc->outputFormat = VALUES; // for contained series & index + if (!GET_TC(tc)->citemName) + { + PyErr_NoMemory(); + } + PRINTMARK(); } int DataFrame_iterNext(JSOBJ obj, JSONTypeContext *tc) { - Py_ssize_t index; - if (!GET_TC(tc)->citemName) - { - return 0; - } - - index = GET_TC(tc)->index; - Py_XDECREF(GET_TC(tc)->itemValue); - if (index == 0) - { - memcpy(GET_TC(tc)->citemName, "columns", sizeof(char)*8); - GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "columns"); - } - else + Py_ssize_t index; + if (!GET_TC(tc)->citemName) + { + return 0; + } + + index = GET_TC(tc)->index; + Py_XDECREF(GET_TC(tc)->itemValue); + if (index == 0) + { + memcpy(GET_TC(tc)->citemName, "columns", sizeof(char)*8); + GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "columns"); + } + else if (index == 1) { - memcpy(GET_TC(tc)->citemName, "index", sizeof(char)*6); - GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "index"); + memcpy(GET_TC(tc)->citemName, "index", sizeof(char)*6); + GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "index"); } else - if (index == 2) - { + if (index == 2) + { memcpy(GET_TC(tc)->citemName, "data", sizeof(char)*5); GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "values"); - } - else - { + } + else + { PRINTMARK(); return 0; - } + } - GET_TC(tc)->index++; - PRINTMARK(); - return 1; + GET_TC(tc)->index++; + PRINTMARK(); + return 1; } void DataFrame_iterEnd(JSOBJ obj, JSONTypeContext *tc) { - PyObjectEncoder* enc = (PyObjectEncoder*) tc->encoder; - enc->outputFormat = enc->originalOutputFormat; - if (GET_TC(tc)->citemName) - { - PyObject_Free(GET_TC(tc)->citemName); - } - PRINTMARK(); + PyObjectEncoder* enc = (PyObjectEncoder*) tc->encoder; + enc->outputFormat = enc->originalOutputFormat; + if (GET_TC(tc)->citemName) + { + PyObject_Free(GET_TC(tc)->citemName); + } + PRINTMARK(); } JSOBJ DataFrame_iterGetValue(JSOBJ obj, JSONTypeContext *tc) { - return GET_TC(tc)->itemValue; + return GET_TC(tc)->itemValue; } char *DataFrame_iterGetName(JSOBJ obj, JSONTypeContext *tc, size_t *outLen) { - *outLen = strlen(GET_TC(tc)->citemName); - return GET_TC(tc)->citemName; + *outLen = strlen(GET_TC(tc)->citemName); + return GET_TC(tc)->citemName; } //============================================================================= @@ -841,46 +939,46 @@ char *DataFrame_iterGetName(JSOBJ obj, JSONTypeContext *tc, size_t *outLen) //============================================================================= void Dict_iterBegin(JSOBJ obj, JSONTypeContext *tc) { - GET_TC(tc)->index = 0; - PRINTMARK(); + GET_TC(tc)->index = 0; + PRINTMARK(); } int Dict_iterNext(JSOBJ obj, JSONTypeContext *tc) { #if PY_MAJOR_VERSION >= 3 - PyObject* itemNameTmp; + PyObject* itemNameTmp; #endif - if (GET_TC(tc)->itemName) - { - Py_DECREF(GET_TC(tc)->itemName); - GET_TC(tc)->itemName = NULL; - } + if (GET_TC(tc)->itemName) + { + Py_DECREF(GET_TC(tc)->itemName); + GET_TC(tc)->itemName = NULL; + } - if (!PyDict_Next ( (PyObject *)GET_TC(tc)->dictObj, &GET_TC(tc)->index, &GET_TC(tc)->itemName, &GET_TC(tc)->itemValue)) - { - PRINTMARK(); - return 0; - } + if (!PyDict_Next ( (PyObject *)GET_TC(tc)->dictObj, &GET_TC(tc)->index, &GET_TC(tc)->itemName, &GET_TC(tc)->itemValue)) + { + PRINTMARK(); + return 0; + } - if (PyUnicode_Check(GET_TC(tc)->itemName)) - { - GET_TC(tc)->itemName = PyUnicode_AsUTF8String (GET_TC(tc)->itemName); - } - else + if (PyUnicode_Check(GET_TC(tc)->itemName)) + { + GET_TC(tc)->itemName = PyUnicode_AsUTF8String (GET_TC(tc)->itemName); + } + else if (!PyString_Check(GET_TC(tc)->itemName)) { - GET_TC(tc)->itemName = PyObject_Str(GET_TC(tc)->itemName); + GET_TC(tc)->itemName = PyObject_Str(GET_TC(tc)->itemName); #if PY_MAJOR_VERSION >= 3 - itemNameTmp = GET_TC(tc)->itemName; - GET_TC(tc)->itemName = PyUnicode_AsUTF8String (GET_TC(tc)->itemName); - Py_DECREF(itemNameTmp); + itemNameTmp = GET_TC(tc)->itemName; + GET_TC(tc)->itemName = PyUnicode_AsUTF8String (GET_TC(tc)->itemName); + Py_DECREF(itemNameTmp); #endif } else { - Py_INCREF(GET_TC(tc)->itemName); + Py_INCREF(GET_TC(tc)->itemName); } PRINTMARK(); return 1; @@ -888,24 +986,24 @@ int Dict_iterNext(JSOBJ obj, JSONTypeContext *tc) void Dict_iterEnd(JSOBJ obj, JSONTypeContext *tc) { - if (GET_TC(tc)->itemName) - { - Py_DECREF(GET_TC(tc)->itemName); - GET_TC(tc)->itemName = NULL; - } - Py_DECREF(GET_TC(tc)->dictObj); - PRINTMARK(); + if (GET_TC(tc)->itemName) + { + Py_DECREF(GET_TC(tc)->itemName); + GET_TC(tc)->itemName = NULL; + } + Py_DECREF(GET_TC(tc)->dictObj); + PRINTMARK(); } JSOBJ Dict_iterGetValue(JSOBJ obj, JSONTypeContext *tc) { - return GET_TC(tc)->itemValue; + return GET_TC(tc)->itemValue; } char *Dict_iterGetName(JSOBJ obj, JSONTypeContext *tc, size_t *outLen) { - *outLen = PyString_GET_SIZE(GET_TC(tc)->itemName); - return PyString_AS_STRING(GET_TC(tc)->itemName); + *outLen = PyString_GET_SIZE(GET_TC(tc)->itemName); + return PyString_AS_STRING(GET_TC(tc)->itemName); } void NpyArr_freeLabels(char** labels, npy_intp len) @@ -1023,433 +1121,456 @@ char** NpyArr_encodeLabels(PyArrayObject* labels, JSONObjectEncoder* enc, npy_in void Object_beginTypeContext (JSOBJ _obj, JSONTypeContext *tc) { - PyObject *obj, *exc, *toDictFunc; - TypeContext *pc; - PyObjectEncoder *enc; - double val; - PRINTMARK(); - if (!_obj) { - tc->type = JT_INVALID; - return; - } + PyObject *obj, *exc, *toDictFunc; + TypeContext *pc; + PyObjectEncoder *enc; + double val; + PRINTMARK(); + if (!_obj) { + tc->type = JT_INVALID; + return; + } - obj = (PyObject*) _obj; - enc = (PyObjectEncoder*) tc->encoder; + obj = (PyObject*) _obj; + enc = (PyObjectEncoder*) tc->encoder; - tc->prv = PyObject_Malloc(sizeof(TypeContext)); - pc = (TypeContext *) tc->prv; - if (!pc) - { - tc->type = JT_INVALID; - PyErr_NoMemory(); - return; - } - pc->newObj = NULL; - pc->dictObj = NULL; - pc->itemValue = NULL; - pc->itemName = NULL; - pc->attrList = NULL; - pc->citemName = NULL; - pc->npyarr = NULL; - pc->rowLabels = NULL; - pc->columnLabels = NULL; - pc->index = 0; - pc->size = 0; - pc->longValue = 0; - pc->transpose = 0; - pc->rowLabelsLen = 0; - pc->columnLabelsLen = 0; - - if (PyIter_Check(obj) || PyArray_Check(obj)) - { - goto ISITERABLE; - } + tc->prv = PyObject_Malloc(sizeof(TypeContext)); + pc = (TypeContext *) tc->prv; + if (!pc) + { + tc->type = JT_INVALID; + PyErr_NoMemory(); + return; + } + pc->newObj = NULL; + pc->dictObj = NULL; + pc->itemValue = NULL; + pc->itemName = NULL; + pc->attrList = NULL; + pc->index = 0; + pc->size = 0; + pc->longValue = 0; + pc->citemName = NULL; + pc->npyarr = NULL; + pc->rowLabels = NULL; + pc->columnLabels = NULL; + pc->transpose = 0; + pc->rowLabelsLen = 0; + pc->columnLabelsLen = 0; + + if (PyIter_Check(obj)) + { + PRINTMARK(); + goto ISITERABLE; + } - if (PyBool_Check(obj)) - { - PRINTMARK(); - tc->type = (obj == Py_True) ? JT_TRUE : JT_FALSE; - return; - } - else - if (PyLong_Check(obj)) - { - PRINTMARK(); - pc->PyTypeToJSON = PyLongToINT64; - tc->type = JT_LONG; - GET_TC(tc)->longValue = PyLong_AsLongLong(obj); + if (PyIter_Check(obj) || PyArray_Check(obj)) + { + goto ISITERABLE; + } - exc = PyErr_Occurred(); + if (PyBool_Check(obj)) + { + PRINTMARK(); + tc->type = (obj == Py_True) ? JT_TRUE : JT_FALSE; + return; + } + else + if (PyLong_Check(obj)) + { + PRINTMARK(); + pc->PyTypeToJSON = PyLongToINT64; + tc->type = JT_LONG; + GET_TC(tc)->longValue = PyLong_AsLongLong(obj); - if (exc && PyErr_ExceptionMatches(PyExc_OverflowError)) - { - PRINTMARK(); - goto INVALID; - } + exc = PyErr_Occurred(); - return; - } - else - if (PyInt_Check(obj)) + if (exc && PyErr_ExceptionMatches(PyExc_OverflowError)) { - PRINTMARK(); + PRINTMARK(); + goto INVALID; + } + + return; + } + else + if (PyInt_Check(obj)) + { + PRINTMARK(); #ifdef _LP64 - pc->PyTypeToJSON = PyIntToINT64; tc->type = JT_LONG; + pc->PyTypeToJSON = PyIntToINT64; tc->type = JT_LONG; #else - pc->PyTypeToJSON = PyIntToINT32; tc->type = JT_INT; + pc->PyTypeToJSON = PyIntToINT32; tc->type = JT_INT; #endif - return; - } - else - if (PyArray_IsScalar(obj, Integer)) - { - PRINTMARK(); - pc->PyTypeToJSON = PyLongToINT64; - tc->type = JT_LONG; - PyArray_CastScalarToCtype(obj, &(GET_TC(tc)->longValue), PyArray_DescrFromType(NPY_INT64)); - - exc = PyErr_Occurred(); + return; + } + else + if (PyArray_IsScalar(obj, Integer)) + { + PRINTMARK(); + pc->PyTypeToJSON = PyLongToINT64; + tc->type = JT_LONG; + PyArray_CastScalarToCtype(obj, &(GET_TC(tc)->longValue), PyArray_DescrFromType(NPY_INT64)); - if (exc && PyErr_ExceptionMatches(PyExc_OverflowError)) - { - PRINTMARK(); - goto INVALID; - } + exc = PyErr_Occurred(); - return; - } - else - if (PyString_Check(obj)) - { - PRINTMARK(); - pc->PyTypeToJSON = PyStringToUTF8; tc->type = JT_UTF8; - return; - } - else - if (PyUnicode_Check(obj)) - { - PRINTMARK(); - pc->PyTypeToJSON = PyUnicodeToUTF8; tc->type = JT_UTF8; - return; - } - else - if (PyFloat_Check(obj)) - { - PRINTMARK(); - val = PyFloat_AS_DOUBLE (obj); - if (npy_isnan(val) || npy_isinf(val)) - { - tc->type = JT_NULL; - } - else - { - pc->PyTypeToJSON = PyFloatToDOUBLE; tc->type = JT_DOUBLE; - } - return; - } - else - if (PyArray_IsScalar(obj, Float)) - { - PRINTMARK(); - pc->PyTypeToJSON = NpyFloatToDOUBLE; tc->type = JT_DOUBLE; - return; - } - else - if (PyArray_IsScalar(obj, Datetime)) + if (exc && PyErr_ExceptionMatches(PyExc_OverflowError)) { - PRINTMARK(); - pc->PyTypeToJSON = NpyDateTimeToINT64; tc->type = JT_LONG; - return; + PRINTMARK(); + goto INVALID; } - else - if (PyDateTime_Check(obj)) - { - PRINTMARK(); - pc->PyTypeToJSON = PyDateTimeToINT64; tc->type = JT_LONG; - return; - } - else - if (PyDate_Check(obj)) + + return; + } + else + if (PyString_Check(obj)) + { + PRINTMARK(); + pc->PyTypeToJSON = PyStringToUTF8; tc->type = JT_UTF8; + return; + } + else + if (PyUnicode_Check(obj)) + { + PRINTMARK(); + pc->PyTypeToJSON = PyUnicodeToUTF8; tc->type = JT_UTF8; + return; + } + else + if (PyFloat_Check(obj)) + { + PRINTMARK(); + val = PyFloat_AS_DOUBLE (obj); + if (npy_isnan(val) || npy_isinf(val)) { - PRINTMARK(); - pc->PyTypeToJSON = PyDateToINT64; tc->type = JT_LONG; - return; + tc->type = JT_NULL; } else - if (obj == Py_None) { - PRINTMARK(); - tc->type = JT_NULL; - return; + pc->PyTypeToJSON = PyFloatToDOUBLE; tc->type = JT_DOUBLE; } + return; + } + else + if (PyObject_IsInstance(obj, type_decimal)) + { + PRINTMARK(); + pc->PyTypeToJSON = PyFloatToDOUBLE; tc->type = JT_DOUBLE; + return; + } + else + if (PyArray_IsScalar(obj, Float)) + { + PRINTMARK(); + pc->PyTypeToJSON = NpyFloatToDOUBLE; tc->type = JT_DOUBLE; + return; + } + else + if (PyArray_IsScalar(obj, Datetime)) + { + PRINTMARK(); + pc->PyTypeToJSON = NpyDateTimeToINT64; tc->type = JT_LONG; + return; + } + else + if (PyDateTime_Check(obj)) + { + PRINTMARK(); + pc->PyTypeToJSON = PyDateTimeToINT64; tc->type = JT_LONG; + return; + } + else + if (PyDate_Check(obj)) + { + PRINTMARK(); + pc->PyTypeToJSON = PyDateToINT64; tc->type = JT_LONG; + return; + } + else + if (obj == Py_None) + { + PRINTMARK(); + tc->type = JT_NULL; + return; + } ISITERABLE: - if (PyDict_Check(obj)) - { - PRINTMARK(); - tc->type = JT_OBJECT; - pc->iterBegin = Dict_iterBegin; - pc->iterEnd = Dict_iterEnd; - pc->iterNext = Dict_iterNext; - pc->iterGetValue = Dict_iterGetValue; - pc->iterGetName = Dict_iterGetName; - pc->dictObj = obj; - Py_INCREF(obj); - - return; + if (PyDict_Check(obj)) + { + PRINTMARK(); + tc->type = JT_OBJECT; + pc->iterBegin = Dict_iterBegin; + pc->iterEnd = Dict_iterEnd; + pc->iterNext = Dict_iterNext; + pc->iterGetValue = Dict_iterGetValue; + pc->iterGetName = Dict_iterGetName; + pc->dictObj = obj; + Py_INCREF(obj); + + return; + } + else + if (PyList_Check(obj)) + { + PRINTMARK(); + tc->type = JT_ARRAY; + pc->iterBegin = List_iterBegin; + pc->iterEnd = List_iterEnd; + pc->iterNext = List_iterNext; + pc->iterGetValue = List_iterGetValue; + pc->iterGetName = List_iterGetName; + return; + } + else + if (PyTuple_Check(obj)) + { + PRINTMARK(); + tc->type = JT_ARRAY; + pc->iterBegin = Tuple_iterBegin; + pc->iterEnd = Tuple_iterEnd; + pc->iterNext = Tuple_iterNext; + pc->iterGetValue = Tuple_iterGetValue; + pc->iterGetName = Tuple_iterGetName; + return; + } + else + if (PyAnySet_Check(obj)) + { + PRINTMARK(); + tc->type = JT_ARRAY; + pc->iterBegin = Iter_iterBegin; + pc->iterEnd = Iter_iterEnd; + pc->iterNext = Iter_iterNext; + pc->iterGetValue = Iter_iterGetValue; + pc->iterGetName = Iter_iterGetName; + return; + } + else + if (PyObject_TypeCheck(obj, (PyTypeObject*) cls_index)) + { + if (enc->outputFormat == SPLIT) + { + PRINTMARK(); + tc->type = JT_OBJECT; + pc->iterBegin = Index_iterBegin; + pc->iterEnd = Index_iterEnd; + pc->iterNext = Index_iterNext; + pc->iterGetValue = Index_iterGetValue; + pc->iterGetName = Index_iterGetName; + return; } - else - if (PyList_Check(obj)) - { - PRINTMARK(); - tc->type = JT_ARRAY; - pc->iterBegin = List_iterBegin; - pc->iterEnd = List_iterEnd; - pc->iterNext = List_iterNext; - pc->iterGetValue = List_iterGetValue; - pc->iterGetName = List_iterGetName; - return; + + PRINTMARK(); + tc->type = JT_ARRAY; + pc->newObj = PyObject_GetAttrString(obj, "values"); + pc->iterBegin = NpyArr_iterBegin; + pc->iterEnd = NpyArr_iterEnd; + pc->iterNext = NpyArr_iterNext; + pc->iterGetValue = NpyArr_iterGetValue; + pc->iterGetName = NpyArr_iterGetName; + return; + } + else + if (PyObject_TypeCheck(obj, (PyTypeObject*) cls_series)) + { + if (enc->outputFormat == SPLIT) + { + PRINTMARK(); + tc->type = JT_OBJECT; + pc->iterBegin = Series_iterBegin; + pc->iterEnd = Series_iterEnd; + pc->iterNext = Series_iterNext; + pc->iterGetValue = Series_iterGetValue; + pc->iterGetName = Series_iterGetName; + return; + } + + if (enc->outputFormat == INDEX || enc->outputFormat == COLUMNS) + { + PRINTMARK(); + tc->type = JT_OBJECT; + pc->columnLabelsLen = PyArray_SIZE(obj); + pc->columnLabels = NpyArr_encodeLabels((PyArrayObject*) PyObject_GetAttrString(obj, "index"), (JSONObjectEncoder*) enc, pc->columnLabelsLen); + if (!pc->columnLabels) + { + goto INVALID; + } } else - if (PyTuple_Check(obj)) { - PRINTMARK(); - tc->type = JT_ARRAY; - pc->iterBegin = Tuple_iterBegin; - pc->iterEnd = Tuple_iterEnd; - pc->iterNext = Tuple_iterNext; - pc->iterGetValue = Tuple_iterGetValue; - pc->iterGetName = Tuple_iterGetName; - return; + PRINTMARK(); + tc->type = JT_ARRAY; + } + pc->newObj = PyObject_GetAttrString(obj, "values"); + pc->iterBegin = NpyArr_iterBegin; + pc->iterEnd = NpyArr_iterEnd; + pc->iterNext = NpyArr_iterNext; + pc->iterGetValue = NpyArr_iterGetValue; + pc->iterGetName = NpyArr_iterGetName; + return; + } + else + if (PyArray_Check(obj)) + { + if (enc->npyCtxtPassthru) + { + PRINTMARK(); + pc->npyarr = enc->npyCtxtPassthru; + tc->type = (pc->npyarr->columnLabels ? JT_OBJECT : JT_ARRAY); + pc->iterBegin = NpyArrPassThru_iterBegin; + pc->iterEnd = NpyArrPassThru_iterEnd; + pc->iterNext = NpyArr_iterNext; + pc->iterGetValue = NpyArr_iterGetValue; + pc->iterGetName = NpyArr_iterGetName; + enc->npyCtxtPassthru = NULL; + return; } - else - if (PyObject_TypeCheck(obj, (PyTypeObject*) cls_index)) - { - if (enc->outputFormat == SPLIT) - { - PRINTMARK(); - tc->type = JT_OBJECT; - pc->iterBegin = Index_iterBegin; - pc->iterEnd = Index_iterEnd; - pc->iterNext = Index_iterNext; - pc->iterGetValue = Index_iterGetValue; - pc->iterGetName = Index_iterGetName; - return; - } - PRINTMARK(); - tc->type = JT_ARRAY; - pc->newObj = PyObject_GetAttrString(obj, "values"); - pc->iterBegin = NpyArr_iterBegin; - pc->iterEnd = NpyArr_iterEnd; - pc->iterNext = NpyArr_iterNext; - pc->iterGetValue = NpyArr_iterGetValue; - pc->iterGetName = NpyArr_iterGetName; - return; + PRINTMARK(); + tc->type = JT_ARRAY; + pc->iterBegin = NpyArr_iterBegin; + pc->iterEnd = NpyArr_iterEnd; + pc->iterNext = NpyArr_iterNext; + pc->iterGetValue = NpyArr_iterGetValue; + pc->iterGetName = NpyArr_iterGetName; + return; + } + else + if (PyObject_TypeCheck(obj, (PyTypeObject*) cls_dataframe)) + { + if (enc->outputFormat == SPLIT) + { + PRINTMARK(); + tc->type = JT_OBJECT; + pc->iterBegin = DataFrame_iterBegin; + pc->iterEnd = DataFrame_iterEnd; + pc->iterNext = DataFrame_iterNext; + pc->iterGetValue = DataFrame_iterGetValue; + pc->iterGetName = DataFrame_iterGetName; + return; } - else - if (PyObject_TypeCheck(obj, (PyTypeObject*) cls_series)) - { - if (enc->outputFormat == SPLIT) - { - PRINTMARK(); - tc->type = JT_OBJECT; - pc->iterBegin = Series_iterBegin; - pc->iterEnd = Series_iterEnd; - pc->iterNext = Series_iterNext; - pc->iterGetValue = Series_iterGetValue; - pc->iterGetName = Series_iterGetName; - return; - } - if (enc->outputFormat == INDEX || enc->outputFormat == COLUMNS) - { - PRINTMARK(); - tc->type = JT_OBJECT; - pc->columnLabelsLen = PyArray_SIZE(obj); - pc->columnLabels = NpyArr_encodeLabels((PyArrayObject*) PyObject_GetAttrString(obj, "index"), (JSONObjectEncoder*) enc, pc->columnLabelsLen); - if (!pc->columnLabels) - { - goto INVALID; - } - } - else - { - PRINTMARK(); - tc->type = JT_ARRAY; - } - pc->newObj = PyObject_GetAttrString(obj, "values"); - pc->iterBegin = NpyArr_iterBegin; - pc->iterEnd = NpyArr_iterEnd; - pc->iterNext = NpyArr_iterNext; - pc->iterGetValue = NpyArr_iterGetValue; - pc->iterGetName = NpyArr_iterGetName; - return; + PRINTMARK(); + pc->newObj = PyObject_GetAttrString(obj, "values"); + pc->iterBegin = NpyArr_iterBegin; + pc->iterEnd = NpyArr_iterEnd; + pc->iterNext = NpyArr_iterNext; + pc->iterGetValue = NpyArr_iterGetValue; + pc->iterGetName = NpyArr_iterGetName; + if (enc->outputFormat == VALUES) + { + PRINTMARK(); + tc->type = JT_ARRAY; } else - if (PyArray_Check(obj)) + if (enc->outputFormat == RECORDS) { - if (enc->npyCtxtPassthru) - { - PRINTMARK(); - pc->npyarr = enc->npyCtxtPassthru; - tc->type = (pc->npyarr->columnLabels ? JT_OBJECT : JT_ARRAY); - pc->iterBegin = NpyArrPassThru_iterBegin; - pc->iterEnd = NpyArrPassThru_iterEnd; - pc->iterNext = NpyArr_iterNext; - pc->iterGetValue = NpyArr_iterGetValue; - pc->iterGetName = NpyArr_iterGetName; - enc->npyCtxtPassthru = NULL; - return; - } - - PRINTMARK(); - tc->type = JT_ARRAY; - pc->iterBegin = NpyArr_iterBegin; - pc->iterEnd = NpyArr_iterEnd; - pc->iterNext = NpyArr_iterNext; - pc->iterGetValue = NpyArr_iterGetValue; - pc->iterGetName = NpyArr_iterGetName; - return; + PRINTMARK(); + tc->type = JT_ARRAY; + pc->columnLabelsLen = PyArray_DIM(pc->newObj, 1); + pc->columnLabels = NpyArr_encodeLabels((PyArrayObject*) PyObject_GetAttrString(obj, "columns"), (JSONObjectEncoder*) enc, pc->columnLabelsLen); + if (!pc->columnLabels) + { + goto INVALID; + } + } + else + if (enc->outputFormat == INDEX) + { + PRINTMARK(); + tc->type = JT_OBJECT; + pc->rowLabelsLen = PyArray_DIM(pc->newObj, 0); + pc->rowLabels = NpyArr_encodeLabels((PyArrayObject*) PyObject_GetAttrString(obj, "index"), (JSONObjectEncoder*) enc, pc->rowLabelsLen); + if (!pc->rowLabels) + { + goto INVALID; + } + pc->columnLabelsLen = PyArray_DIM(pc->newObj, 1); + pc->columnLabels = NpyArr_encodeLabels((PyArrayObject*) PyObject_GetAttrString(obj, "columns"), (JSONObjectEncoder*) enc, pc->columnLabelsLen); + if (!pc->columnLabels) + { + NpyArr_freeLabels(pc->rowLabels, pc->rowLabelsLen); + pc->rowLabels = NULL; + goto INVALID; + } } else - if (PyObject_TypeCheck(obj, (PyTypeObject*) cls_dataframe)) { - if (enc->outputFormat == SPLIT) - { - PRINTMARK(); - tc->type = JT_OBJECT; - pc->iterBegin = DataFrame_iterBegin; - pc->iterEnd = DataFrame_iterEnd; - pc->iterNext = DataFrame_iterNext; - pc->iterGetValue = DataFrame_iterGetValue; - pc->iterGetName = DataFrame_iterGetName; - return; - } - - PRINTMARK(); - pc->newObj = PyObject_GetAttrString(obj, "values"); - pc->iterBegin = NpyArr_iterBegin; - pc->iterEnd = NpyArr_iterEnd; - pc->iterNext = NpyArr_iterNext; - pc->iterGetValue = NpyArr_iterGetValue; - pc->iterGetName = NpyArr_iterGetName; - if (enc->outputFormat == VALUES) - { - PRINTMARK(); - tc->type = JT_ARRAY; - } - else - if (enc->outputFormat == RECORDS) - { - PRINTMARK(); - tc->type = JT_ARRAY; - pc->columnLabelsLen = PyArray_DIM(pc->newObj, 1); - pc->columnLabels = NpyArr_encodeLabels((PyArrayObject*) PyObject_GetAttrString(obj, "columns"), (JSONObjectEncoder*) enc, pc->columnLabelsLen); - if (!pc->columnLabels) - { - goto INVALID; - } - } - else - if (enc->outputFormat == INDEX) - { - PRINTMARK(); - tc->type = JT_OBJECT; - pc->rowLabelsLen = PyArray_DIM(pc->newObj, 0); - pc->rowLabels = NpyArr_encodeLabels((PyArrayObject*) PyObject_GetAttrString(obj, "index"), (JSONObjectEncoder*) enc, pc->rowLabelsLen); - if (!pc->rowLabels) - { - goto INVALID; - } - pc->columnLabelsLen = PyArray_DIM(pc->newObj, 1); - pc->columnLabels = NpyArr_encodeLabels((PyArrayObject*) PyObject_GetAttrString(obj, "columns"), (JSONObjectEncoder*) enc, pc->columnLabelsLen); - if (!pc->columnLabels) - { - NpyArr_freeLabels(pc->rowLabels, pc->rowLabelsLen); - pc->rowLabels = NULL; - goto INVALID; - } - } - else - { - PRINTMARK(); - tc->type = JT_OBJECT; - pc->rowLabelsLen = PyArray_DIM(pc->newObj, 1); - pc->rowLabels = NpyArr_encodeLabels((PyArrayObject*) PyObject_GetAttrString(obj, "columns"), (JSONObjectEncoder*) enc, pc->rowLabelsLen); - if (!pc->rowLabels) - { - goto INVALID; - } - pc->columnLabelsLen = PyArray_DIM(pc->newObj, 0); - pc->columnLabels = NpyArr_encodeLabels((PyArrayObject*) PyObject_GetAttrString(obj, "index"), (JSONObjectEncoder*) enc, pc->columnLabelsLen); - if (!pc->columnLabels) - { - NpyArr_freeLabels(pc->rowLabels, pc->rowLabelsLen); - pc->rowLabels = NULL; - goto INVALID; - } - pc->transpose = 1; - } - return; + PRINTMARK(); + tc->type = JT_OBJECT; + pc->rowLabelsLen = PyArray_DIM(pc->newObj, 1); + pc->rowLabels = NpyArr_encodeLabels((PyArrayObject*) PyObject_GetAttrString(obj, "columns"), (JSONObjectEncoder*) enc, pc->rowLabelsLen); + if (!pc->rowLabels) + { + goto INVALID; + } + pc->columnLabelsLen = PyArray_DIM(pc->newObj, 0); + pc->columnLabels = NpyArr_encodeLabels((PyArrayObject*) PyObject_GetAttrString(obj, "index"), (JSONObjectEncoder*) enc, pc->columnLabelsLen); + if (!pc->columnLabels) + { + NpyArr_freeLabels(pc->rowLabels, pc->rowLabelsLen); + pc->rowLabels = NULL; + goto INVALID; + } + pc->transpose = 1; } + return; + } + toDictFunc = PyObject_GetAttrString(obj, "toDict"); - toDictFunc = PyObject_GetAttrString(obj, "toDict"); + if (toDictFunc) + { + PyObject* tuple = PyTuple_New(0); + PyObject* toDictResult = PyObject_Call(toDictFunc, tuple, NULL); + Py_DECREF(tuple); + Py_DECREF(toDictFunc); - if (toDictFunc) + if (toDictResult == NULL) { - PyObject* tuple = PyTuple_New(0); - PyObject* toDictResult = PyObject_Call(toDictFunc, tuple, NULL); - Py_DECREF(tuple); - Py_DECREF(toDictFunc); - - if (toDictResult == NULL) - { - PyErr_Clear(); - tc->type = JT_NULL; - return; - } - - if (!PyDict_Check(toDictResult)) - { - Py_DECREF(toDictResult); - tc->type = JT_NULL; - return; - } - - PRINTMARK(); - tc->type = JT_OBJECT; - pc->iterBegin = Dict_iterBegin; - pc->iterEnd = Dict_iterEnd; - pc->iterNext = Dict_iterNext; - pc->iterGetValue = Dict_iterGetValue; - pc->iterGetName = Dict_iterGetName; - pc->dictObj = toDictResult; - return; + PyErr_Clear(); + tc->type = JT_NULL; + return; } - PyErr_Clear(); + if (!PyDict_Check(toDictResult)) + { + Py_DECREF(toDictResult); + tc->type = JT_NULL; + return; + } + PRINTMARK(); tc->type = JT_OBJECT; - pc->iterBegin = Dir_iterBegin; - pc->iterEnd = Dir_iterEnd; - pc->iterNext = Dir_iterNext; - pc->iterGetValue = Dir_iterGetValue; - pc->iterGetName = Dir_iterGetName; - + pc->iterBegin = Dict_iterBegin; + pc->iterEnd = Dict_iterEnd; + pc->iterNext = Dict_iterNext; + pc->iterGetValue = Dict_iterGetValue; + pc->iterGetName = Dict_iterGetName; + pc->dictObj = toDictResult; return; + } + + PyErr_Clear(); + + PRINTMARK(); + tc->type = JT_OBJECT; + pc->iterBegin = Dir_iterBegin; + pc->iterEnd = Dir_iterEnd; + pc->iterNext = Dir_iterNext; + pc->iterGetValue = Dir_iterGetValue; + pc->iterGetName = Dir_iterGetName; + return; INVALID: - tc->type = JT_INVALID; - PyObject_Free(tc->prv); - tc->prv = NULL; - return; + tc->type = JT_INVALID; + PyObject_Free(tc->prv); + tc->prv = NULL; + return; } - void Object_endTypeContext(JSOBJ obj, JSONTypeContext *tc) { Py_XDECREF(GET_TC(tc)->newObj); @@ -1462,244 +1583,244 @@ void Object_endTypeContext(JSOBJ obj, JSONTypeContext *tc) const char *Object_getStringValue(JSOBJ obj, JSONTypeContext *tc, size_t *_outLen) { - return GET_TC(tc)->PyTypeToJSON (obj, tc, NULL, _outLen); + return GET_TC(tc)->PyTypeToJSON (obj, tc, NULL, _outLen); } JSINT64 Object_getLongValue(JSOBJ obj, JSONTypeContext *tc) { - JSINT64 ret; - GET_TC(tc)->PyTypeToJSON (obj, tc, &ret, NULL); - - return ret; + JSINT64 ret; + GET_TC(tc)->PyTypeToJSON (obj, tc, &ret, NULL); + return ret; } JSINT32 Object_getIntValue(JSOBJ obj, JSONTypeContext *tc) { - JSINT32 ret; - GET_TC(tc)->PyTypeToJSON (obj, tc, &ret, NULL); - return ret; + JSINT32 ret; + GET_TC(tc)->PyTypeToJSON (obj, tc, &ret, NULL); + return ret; } - double Object_getDoubleValue(JSOBJ obj, JSONTypeContext *tc) { - double ret; - GET_TC(tc)->PyTypeToJSON (obj, tc, &ret, NULL); - return ret; + double ret; + GET_TC(tc)->PyTypeToJSON (obj, tc, &ret, NULL); + return ret; } static void Object_releaseObject(JSOBJ _obj) { - Py_DECREF( (PyObject *) _obj); + Py_DECREF( (PyObject *) _obj); } - - void Object_iterBegin(JSOBJ obj, JSONTypeContext *tc) { - GET_TC(tc)->iterBegin(obj, tc); + GET_TC(tc)->iterBegin(obj, tc); } int Object_iterNext(JSOBJ obj, JSONTypeContext *tc) { - return GET_TC(tc)->iterNext(obj, tc); + return GET_TC(tc)->iterNext(obj, tc); } void Object_iterEnd(JSOBJ obj, JSONTypeContext *tc) { - GET_TC(tc)->iterEnd(obj, tc); + GET_TC(tc)->iterEnd(obj, tc); } JSOBJ Object_iterGetValue(JSOBJ obj, JSONTypeContext *tc) { - return GET_TC(tc)->iterGetValue(obj, tc); + return GET_TC(tc)->iterGetValue(obj, tc); } char *Object_iterGetName(JSOBJ obj, JSONTypeContext *tc, size_t *outLen) { - return GET_TC(tc)->iterGetName(obj, tc, outLen); + return GET_TC(tc)->iterGetName(obj, tc, outLen); } - PyObject* objToJSON(PyObject* self, PyObject *args, PyObject *kwargs) { - static char *kwlist[] = { "obj", "ensure_ascii", "double_precision", "orient", NULL}; + static char *kwlist[] = { "obj", "ensure_ascii", "double_precision", "encode_html_chars", "orient", NULL}; + + char buffer[65536]; + char *ret; + PyObject *newobj; + PyObject *oinput = NULL; + PyObject *oensureAscii = NULL; + int idoublePrecision = 10; // default double precision setting + PyObject *oencodeHTMLChars = NULL; + char *sOrient = NULL; + + PyObjectEncoder pyEncoder = + { + { + Object_beginTypeContext, + Object_endTypeContext, + Object_getStringValue, + Object_getLongValue, + Object_getIntValue, + Object_getDoubleValue, + Object_iterBegin, + Object_iterNext, + Object_iterEnd, + Object_iterGetValue, + Object_iterGetName, + Object_releaseObject, + PyObject_Malloc, + PyObject_Realloc, + PyObject_Free, + -1, //recursionMax + idoublePrecision, + 1, //forceAscii + 0, //encodeHTMLChars + } + }; + JSONObjectEncoder* encoder = (JSONObjectEncoder*) &pyEncoder; + + pyEncoder.npyCtxtPassthru = NULL; + pyEncoder.outputFormat = COLUMNS; + + PRINTMARK(); + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|OiOs", kwlist, &oinput, &oensureAscii, &idoublePrecision, &oencodeHTMLChars, &sOrient)) + { + return NULL; + } - char buffer[65536]; - char *ret; - PyObject *newobj; - PyObject *oinput = NULL; - PyObject *oensureAscii = NULL; - char *sOrient = NULL; - int idoublePrecision = 5; // default double precision setting + if (oensureAscii != NULL && !PyObject_IsTrue(oensureAscii)) + { + encoder->forceASCII = 0; + } - PyObjectEncoder pyEncoder = - { - { - Object_beginTypeContext, //void (*beginTypeContext)(JSOBJ obj, JSONTypeContext *tc); - Object_endTypeContext, //void (*endTypeContext)(JSOBJ obj, JSONTypeContext *tc); - Object_getStringValue, //const char *(*getStringValue)(JSOBJ obj, JSONTypeContext *tc, size_t *_outLen); - Object_getLongValue, //JSLONG (*getLongValue)(JSOBJ obj, JSONTypeContext *tc); - Object_getIntValue, //JSLONG (*getLongValue)(JSOBJ obj, JSONTypeContext *tc); - Object_getDoubleValue, //double (*getDoubleValue)(JSOBJ obj, JSONTypeContext *tc); - Object_iterBegin, //JSPFN_ITERBEGIN iterBegin; - Object_iterNext, //JSPFN_ITERNEXT iterNext; - Object_iterEnd, //JSPFN_ITEREND iterEnd; - Object_iterGetValue, //JSPFN_ITERGETVALUE iterGetValue; - Object_iterGetName, //JSPFN_ITERGETNAME iterGetName; - Object_releaseObject, //void (*releaseValue)(JSONTypeContext *ti); - PyObject_Malloc, //JSPFN_MALLOC malloc; - PyObject_Realloc, //JSPFN_REALLOC realloc; - PyObject_Free, //JSPFN_FREE free; - -1, //recursionMax - idoublePrecision, - 1, //forceAscii - } - }; - JSONObjectEncoder* encoder = (JSONObjectEncoder*) &pyEncoder; + if (oencodeHTMLChars != NULL && PyObject_IsTrue(oencodeHTMLChars)) + { + encoder->encodeHTMLChars = 1; + } - pyEncoder.npyCtxtPassthru = NULL; - pyEncoder.outputFormat = COLUMNS; + encoder->doublePrecision = idoublePrecision; - PRINTMARK(); - - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|Ois", kwlist, &oinput, &oensureAscii, &idoublePrecision, &sOrient)) + if (sOrient != NULL) + { + if (strcmp(sOrient, "records") == 0) { - return NULL; + pyEncoder.outputFormat = RECORDS; } - - if (sOrient != NULL) + else + if (strcmp(sOrient, "index") == 0) { - if (strcmp(sOrient, "records") == 0) - { - pyEncoder.outputFormat = RECORDS; - } - else - if (strcmp(sOrient, "index") == 0) - { - pyEncoder.outputFormat = INDEX; - } - else - if (strcmp(sOrient, "split") == 0) - { - pyEncoder.outputFormat = SPLIT; - } - else - if (strcmp(sOrient, "values") == 0) - { - pyEncoder.outputFormat = VALUES; - } - else - if (strcmp(sOrient, "columns") != 0) - { - PyErr_Format (PyExc_ValueError, "Invalid value '%s' for option 'orient'", sOrient); - return NULL; - } + pyEncoder.outputFormat = INDEX; } - - pyEncoder.originalOutputFormat = pyEncoder.outputFormat; - - if (oensureAscii != NULL && !PyObject_IsTrue(oensureAscii)) + else + if (strcmp(sOrient, "split") == 0) { - encoder->forceASCII = 0; + pyEncoder.outputFormat = SPLIT; } - - encoder->doublePrecision = idoublePrecision; - - PRINTMARK(); - ret = JSON_EncodeObject (oinput, encoder, buffer, sizeof (buffer)); - PRINTMARK(); - - if (PyErr_Occurred()) + else + if (strcmp(sOrient, "values") == 0) { - return NULL; + pyEncoder.outputFormat = VALUES; } - - if (encoder->errorMsg) + else + if (strcmp(sOrient, "columns") != 0) { - if (ret != buffer) - { - encoder->free (ret); - } - - PyErr_Format (PyExc_OverflowError, "%s", encoder->errorMsg); - return NULL; + PyErr_Format (PyExc_ValueError, "Invalid value '%s' for option 'orient'", sOrient); + return NULL; } + } + + pyEncoder.originalOutputFormat = pyEncoder.outputFormat; + PRINTMARK(); + ret = JSON_EncodeObject (oinput, encoder, buffer, sizeof (buffer)); + PRINTMARK(); - newobj = PyString_FromString (ret); + if (PyErr_Occurred()) + { + return NULL; + } + if (encoder->errorMsg) + { if (ret != buffer) { - encoder->free (ret); + encoder->free (ret); } - PRINTMARK(); + PyErr_Format (PyExc_OverflowError, "%s", encoder->errorMsg); + return NULL; + } + + newobj = PyString_FromString (ret); + + if (ret != buffer) + { + encoder->free (ret); + } - return newobj; + PRINTMARK(); + + return newobj; } PyObject* objToJSONFile(PyObject* self, PyObject *args, PyObject *kwargs) { - PyObject *data; - PyObject *file; - PyObject *string; - PyObject *write; - PyObject *argtuple; - - PRINTMARK(); + PyObject *data; + PyObject *file; + PyObject *string; + PyObject *write; + PyObject *argtuple; - if (!PyArg_ParseTuple (args, "OO", &data, &file)) { - return NULL; - } + PRINTMARK(); - if (!PyObject_HasAttrString (file, "write")) - { - PyErr_Format (PyExc_TypeError, "expected file"); - return NULL; - } + if (!PyArg_ParseTuple (args, "OO", &data, &file)) + { + return NULL; + } - write = PyObject_GetAttrString (file, "write"); + if (!PyObject_HasAttrString (file, "write")) + { + PyErr_Format (PyExc_TypeError, "expected file"); + return NULL; + } - if (!PyCallable_Check (write)) { - Py_XDECREF(write); - PyErr_Format (PyExc_TypeError, "expected file"); - return NULL; - } + write = PyObject_GetAttrString (file, "write"); - argtuple = PyTuple_Pack(1, data); + if (!PyCallable_Check (write)) + { + Py_XDECREF(write); + PyErr_Format (PyExc_TypeError, "expected file"); + return NULL; + } - string = objToJSON (self, argtuple, kwargs); + argtuple = PyTuple_Pack(1, data); - if (string == NULL) - { - Py_XDECREF(write); - Py_XDECREF(argtuple); - return NULL; - } + string = objToJSON (self, argtuple, kwargs); + if (string == NULL) + { + Py_XDECREF(write); Py_XDECREF(argtuple); + return NULL; + } - argtuple = PyTuple_Pack (1, string); - if (argtuple == NULL) - { - Py_XDECREF(write); - return NULL; - } - if (PyObject_CallObject (write, argtuple) == NULL) - { - Py_XDECREF(write); - Py_XDECREF(argtuple); - return NULL; - } + Py_XDECREF(argtuple); + argtuple = PyTuple_Pack (1, string); + if (argtuple == NULL) + { Py_XDECREF(write); - Py_DECREF(argtuple); - Py_XDECREF(string); - - PRINTMARK(); + return NULL; + } + if (PyObject_CallObject (write, argtuple) == NULL) + { + Py_XDECREF(write); + Py_XDECREF(argtuple); + return NULL; + } - Py_RETURN_NONE; + Py_XDECREF(write); + Py_DECREF(argtuple); + Py_XDECREF(string); + PRINTMARK(); + Py_RETURN_NONE; } - diff --git a/pandas/src/ujson/python/py_defines.h b/pandas/src/ujson/python/py_defines.h index 1544c2e3cf34d..312914217d8e3 100644 --- a/pandas/src/ujson/python/py_defines.h +++ b/pandas/src/ujson/python/py_defines.h @@ -1,3 +1,40 @@ +/* +Copyright (c) 2011-2013, ESN Social Software AB and Jonas Tarnstrom +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the ESN Social Software AB nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL ESN SOCIAL SOFTWARE AB OR JONAS TARNSTROM BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +Portions of code from MODP_ASCII - Ascii transformations (upper/lower, etc) +http://code.google.com/p/stringencoders/ +Copyright (c) 2007 Nick Galbreath -- nickg [at] modp [dot] com. All rights reserved. + +Numeric decoder derived from from TCL library +http://www.opensource.apple.com/source/tcl/tcl-14/tcl/license.terms + * Copyright (c) 1988-1993 The Regents of the University of California. + * Copyright (c) 1994 Sun Microsystems, Inc. +*/ + #include #if PY_MAJOR_VERSION >= 3 diff --git a/pandas/src/ujson/python/ujson.c b/pandas/src/ujson/python/ujson.c index e04309e620a1d..33b01b341c20a 100644 --- a/pandas/src/ujson/python/ujson.c +++ b/pandas/src/ujson/python/ujson.c @@ -1,3 +1,40 @@ +/* +Copyright (c) 2011-2013, ESN Social Software AB and Jonas Tarnstrom +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: +* Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +* Neither the name of the ESN Social Software AB nor the +names of its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL ESN SOCIAL SOFTWARE AB OR JONAS TARNSTROM BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +Portions of code from MODP_ASCII - Ascii transformations (upper/lower, etc) +http://code.google.com/p/stringencoders/ +Copyright (c) 2007 Nick Galbreath -- nickg [at] modp [dot] com. All rights reserved. + +Numeric decoder derived from from TCL library +http://www.opensource.apple.com/source/tcl/tcl-14/tcl/license.terms +* Copyright (c) 1988-1993 The Regents of the University of California. +* Copyright (c) 1994 Sun Microsystems, Inc. +*/ + #include "py_defines.h" #include "version.h" @@ -15,28 +52,30 @@ PyObject* objToJSONFile(PyObject* self, PyObject *args, PyObject *kwargs); PyObject* JSONFileToObj(PyObject* self, PyObject *args, PyObject *kwargs); +#define ENCODER_HELP_TEXT "Use ensure_ascii=false to output UTF-8. Pass in double_precision to alter the maximum digit precision of doubles. Set encode_html_chars=True to encode < > & as unicode escape sequences." + static PyMethodDef ujsonMethods[] = { - {"encode", (PyCFunction) objToJSON, METH_VARARGS | METH_KEYWORDS, "Converts arbitrary object recursivly into JSON. Use ensure_ascii=false to output UTF-8. Pass in double_precision to alter the maximum digit precision with doubles"}, - {"decode", (PyCFunction) JSONToObj, METH_VARARGS | METH_KEYWORDS, "Converts JSON as string to dict object structure"}, - {"dumps", (PyCFunction) objToJSON, METH_VARARGS | METH_KEYWORDS, "Converts arbitrary object recursivly into JSON. Use ensure_ascii=false to output UTF-8"}, - {"loads", (PyCFunction) JSONToObj, METH_VARARGS | METH_KEYWORDS, "Converts JSON as string to dict object structure"}, - {"dump", (PyCFunction) objToJSONFile, METH_VARARGS | METH_KEYWORDS, "Converts arbitrary object recursively into JSON file. Use ensure_ascii=false to output UTF-8"}, - {"load", (PyCFunction) JSONFileToObj, METH_VARARGS | METH_KEYWORDS, "Converts JSON as file to dict object structure"}, - {NULL, NULL, 0, NULL} /* Sentinel */ + {"encode", (PyCFunction) objToJSON, METH_VARARGS | METH_KEYWORDS, "Converts arbitrary object recursivly into JSON. " ENCODER_HELP_TEXT}, + {"decode", (PyCFunction) JSONToObj, METH_VARARGS | METH_KEYWORDS, "Converts JSON as string to dict object structure. Use precise_float=True to use high precision float decoder."}, + {"dumps", (PyCFunction) objToJSON, METH_VARARGS | METH_KEYWORDS, "Converts arbitrary object recursivly into JSON. " ENCODER_HELP_TEXT}, + {"loads", (PyCFunction) JSONToObj, METH_VARARGS | METH_KEYWORDS, "Converts JSON as string to dict object structure. Use precise_float=True to use high precision float decoder."}, + {"dump", (PyCFunction) objToJSONFile, METH_VARARGS | METH_KEYWORDS, "Converts arbitrary object recursively into JSON file. " ENCODER_HELP_TEXT}, + {"load", (PyCFunction) JSONFileToObj, METH_VARARGS | METH_KEYWORDS, "Converts JSON as file to dict object structure. Use precise_float=True to use high precision float decoder."}, + {NULL, NULL, 0, NULL} /* Sentinel */ }; #if PY_MAJOR_VERSION >= 3 static struct PyModuleDef moduledef = { - PyModuleDef_HEAD_INIT, - "_pandasujson", - 0, /* m_doc */ - -1, /* m_size */ - ujsonMethods, /* m_methods */ - NULL, /* m_reload */ - NULL, /* m_traverse */ - NULL, /* m_clear */ - NULL /* m_free */ + PyModuleDef_HEAD_INIT, + "_pandasujson", + 0, /* m_doc */ + -1, /* m_size */ + ujsonMethods, /* m_methods */ + NULL, /* m_reload */ + NULL, /* m_traverse */ + NULL, /* m_clear */ + NULL /* m_free */ }; #define PYMODINITFUNC PyObject *PyInit_json(void) @@ -53,21 +92,21 @@ static struct PyModuleDef moduledef = { PYMODINITFUNC { - PyObject *module; - PyObject *version_string; + PyObject *module; + PyObject *version_string; - initObjToJSON(); - module = PYMODULE_CREATE(); + initObjToJSON(); + module = PYMODULE_CREATE(); - if (module == NULL) - { - MODINITERROR; - } + if (module == NULL) + { + MODINITERROR; + } - version_string = PyString_FromString (UJSON_VERSION); - PyModule_AddObject (module, "__version__", version_string); + version_string = PyString_FromString (UJSON_VERSION); + PyModule_AddObject (module, "__version__", version_string); #if PY_MAJOR_VERSION >= 3 - return module; + return module; #endif } diff --git a/pandas/src/ujson/python/version.h b/pandas/src/ujson/python/version.h index 9449441411192..0ccfbfe74521c 100644 --- a/pandas/src/ujson/python/version.h +++ b/pandas/src/ujson/python/version.h @@ -1 +1,38 @@ -#define UJSON_VERSION "1.18" +/* +Copyright (c) 2011-2013, ESN Social Software AB and Jonas Tarnstrom +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the ESN Social Software AB nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL ESN SOCIAL SOFTWARE AB OR JONAS TARNSTROM BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +Portions of code from MODP_ASCII - Ascii transformations (upper/lower, etc) +http://code.google.com/p/stringencoders/ +Copyright (c) 2007 Nick Galbreath -- nickg [at] modp [dot] com. All rights reserved. + +Numeric decoder derived from from TCL library +http://www.opensource.apple.com/source/tcl/tcl-14/tcl/license.terms + * Copyright (c) 1988-1993 The Regents of the University of California. + * Copyright (c) 1994 Sun Microsystems, Inc. +*/ + +#define UJSON_VERSION "1.33" From 1ecac4756b102b1881b145b1e673d1bd7b87bc62 Mon Sep 17 00:00:00 2001 From: Kieran O'Mahony Date: Tue, 18 Jun 2013 17:28:56 +0100 Subject: [PATCH 2/2] BUG: ujson fix for VC++ --- pandas/src/ujson/python/objToJSON.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/src/ujson/python/objToJSON.c b/pandas/src/ujson/python/objToJSON.c index 040f86793d206..89d3c203fbb7d 100644 --- a/pandas/src/ujson/python/objToJSON.c +++ b/pandas/src/ujson/python/objToJSON.c @@ -144,13 +144,13 @@ void initObjToJSON(void) int initObjToJSON(void) #endif { + PyObject *mod_frame; PyObject* mod_decimal = PyImport_ImportModule("decimal"); type_decimal = PyObject_GetAttrString(mod_decimal, "Decimal"); Py_INCREF(type_decimal); Py_DECREF(mod_decimal); PyDateTime_IMPORT; - PyObject *mod_frame; mod_frame = PyImport_ImportModule("pandas.core.frame"); if (mod_frame)