From edefe981ca2422e78045b981bb373cfbeb458097 Mon Sep 17 00:00:00 2001 From: Kieran O'Mahony Date: Wed, 25 Sep 2013 11:24:04 +1000 Subject: [PATCH] FIX: JSON support non C locales --- doc/source/release.rst | 2 ++ pandas/io/tests/test_json/test_ujson.py | 13 +++++++++++++ pandas/src/ujson/lib/ultrajsondec.c | 12 +++++++++++- pandas/src/ujson/lib/ultrajsonenc.c | 11 +++++++++++ 4 files changed, 37 insertions(+), 1 deletion(-) diff --git a/doc/source/release.rst b/doc/source/release.rst index 97150cbeb53a2..8584fe564f8b0 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -358,6 +358,8 @@ Bug Fixes dtypes, surfaced in (:issue:`4377`) - Fixed bug with duplicate columns and type conversion in ``read_json`` when ``orient='split'`` (:issue:`4377`) + - Fixed JSON bug where locales with decimal separators other than '.' threw + exceptions when encoding / decoding certain values. (:issue:`4918`) - Fix ``.iat`` indexing with a ``PeriodIndex`` (:issue:`4390`) - Fixed an issue where ``PeriodIndex`` joining with self was returning a new instance rather than the same instance (:issue:`4379`); also adds a test diff --git a/pandas/io/tests/test_json/test_ujson.py b/pandas/io/tests/test_json/test_ujson.py index 4d6218d3dbc35..38a30b8baf459 100644 --- a/pandas/io/tests/test_json/test_ujson.py +++ b/pandas/io/tests/test_json/test_ujson.py @@ -83,6 +83,19 @@ def test_doubleLongDecimalIssue(self): decoded = ujson.decode(encoded) self.assertEqual(sut, decoded) + def test_encodeNonCLocale(self): + import locale + savedlocale = locale.getlocale(locale.LC_NUMERIC) + try: + locale.setlocale(locale.LC_NUMERIC, 'it_IT.UTF-8') + except: + try: + locale.setlocale(locale.LC_NUMERIC, 'Italian_Italy') + except: + raise nose.SkipTest('Could not set locale for testing') + self.assertEqual(ujson.loads(ujson.dumps(4.78e60)), 4.78e60) + self.assertEqual(ujson.loads('4.78', precise_float=True), 4.78) + locale.setlocale(locale.LC_NUMERIC, savedlocale) def test_encodeDecodeLongDecimal(self): sut = {u('a'): -528656961.4399388} diff --git a/pandas/src/ujson/lib/ultrajsondec.c b/pandas/src/ujson/lib/ultrajsondec.c index c5cf341ad3092..85a8387547641 100644 --- a/pandas/src/ujson/lib/ultrajsondec.c +++ b/pandas/src/ujson/lib/ultrajsondec.c @@ -43,6 +43,7 @@ Numeric decoder derived from from TCL library #include #include #include +#include #ifndef TRUE #define TRUE 1 @@ -824,7 +825,7 @@ FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_object( struct DecoderState *ds) default: ds->dec->releaseObject(ds->prv, newObj, ds->dec); - return SetError(ds, -1, "Unexpected character in found when decoding object value"); + return SetError(ds, -1, "Unexpected character found when decoding object value"); } } } @@ -874,6 +875,7 @@ JSOBJ JSON_DecodeObject(JSONObjectDecoder *dec, const char *buffer, size_t cbBuf { /* FIXME: Base the size of escBuffer of that of cbBuffer so that the unicode escaping doesn't run into the wall each time */ + char *locale; struct DecoderState ds; wchar_t escBuffer[(JSON_MAX_STACK_BUFFER_SIZE / sizeof(wchar_t))]; JSOBJ ret; @@ -892,7 +894,15 @@ JSOBJ JSON_DecodeObject(JSONObjectDecoder *dec, const char *buffer, size_t cbBuf ds.dec = dec; + locale = strdup(setlocale(LC_NUMERIC, NULL)); + if (!locale) + { + return SetError(&ds, -1, "Could not reserve memory block"); + } + setlocale(LC_NUMERIC, "C"); ret = decode_any (&ds); + setlocale(LC_NUMERIC, locale); + free(locale); if (ds.escHeap) { diff --git a/pandas/src/ujson/lib/ultrajsonenc.c b/pandas/src/ujson/lib/ultrajsonenc.c index 15d92d42f6753..17048bd86adc2 100644 --- a/pandas/src/ujson/lib/ultrajsonenc.c +++ b/pandas/src/ujson/lib/ultrajsonenc.c @@ -41,6 +41,7 @@ Numeric decoder derived from from TCL library #include #include #include +#include #include @@ -877,6 +878,7 @@ void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name, size_t cbName) char *JSON_EncodeObject(JSOBJ obj, JSONObjectEncoder *enc, char *_buffer, size_t _cbBuffer) { + char *locale; enc->malloc = enc->malloc ? enc->malloc : malloc; enc->free = enc->free ? enc->free : free; enc->realloc = enc->realloc ? enc->realloc : realloc; @@ -915,7 +917,16 @@ char *JSON_EncodeObject(JSOBJ obj, JSONObjectEncoder *enc, char *_buffer, size_t enc->end = enc->start + _cbBuffer; enc->offset = enc->start; + locale = strdup(setlocale(LC_NUMERIC, NULL)); + if (!locale) + { + SetError(NULL, enc, "Could not reserve memory block"); + return NULL; + } + setlocale(LC_NUMERIC, "C"); encode (obj, enc, NULL, 0); + setlocale(LC_NUMERIC, locale); + free(locale); Buffer_Reserve(enc, 1); if (enc->errorMsg)