From b9751e95ab539ffe4d91df03001cca8b34ab15c8 Mon Sep 17 00:00:00 2001 From: Felix Marczinowski Date: Wed, 18 May 2016 14:46:42 +0200 Subject: [PATCH 1/7] add test for #13213 --- pandas/tests/frame/test_json.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 pandas/tests/frame/test_json.py diff --git a/pandas/tests/frame/test_json.py b/pandas/tests/frame/test_json.py new file mode 100644 index 0000000000000..d73da6a427cf2 --- /dev/null +++ b/pandas/tests/frame/test_json.py @@ -0,0 +1,26 @@ +# -*- coding: utf-8 -*- +"""""" + +from __future__ import (absolute_import, division, print_function) + +import pandas.util.testing as tm +import pandas as pd +import json + + +class TestJSON(tm.TestCase): + testjson = u''' + [{"Ünicøde":0,"sub":{"A":1, "B":2}}, + {"Ünicøde":1,"sub":{"A":3, "B":4}}] + '''.encode('utf8') + + testdata = { + u'sub.A': [1, 3], + u'sub.B': [2, 4], + u'Ünicøde': [0, 1] + } + testdf = pd.DataFrame(testdata) + + def test_json_normalize(self): + df = pd.io.json.json_normalize(json.loads(self.testjson)) + tm.assert_frame_equal(df, self.testdf) From 4dcd2c5d013318276b850248f36603a65199bc5c Mon Sep 17 00:00:00 2001 From: Felix Marczinowski Date: Wed, 18 May 2016 14:46:59 +0200 Subject: [PATCH 2/7] fix for #13213 --- pandas/io/json.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/io/json.py b/pandas/io/json.py index 08bfd8d7796a0..12de3a21c732d 100644 --- a/pandas/io/json.py +++ b/pandas/io/json.py @@ -614,10 +614,12 @@ def nested_to_record(ds, prefix="", level=0): new_d = copy.deepcopy(d) for k, v in d.items(): # each key gets renamed with prefix + if not isinstance(k, basestring): + k = str(k) if level == 0: - newkey = str(k) + newkey = k else: - newkey = prefix + '.' + str(k) + newkey = prefix + '.' + k # only dicts gets recurse-flattend # only at level>1 do we rename the rest of the keys From dd7302c176fac2691ee1a826eeaa5b0d89262485 Mon Sep 17 00:00:00 2001 From: Felix Marczinowski Date: Wed, 18 May 2016 15:09:15 +0200 Subject: [PATCH 3/7] remove encoding signature from test --- pandas/tests/frame/test_json.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/pandas/tests/frame/test_json.py b/pandas/tests/frame/test_json.py index d73da6a427cf2..2bbf45ae8ba8d 100644 --- a/pandas/tests/frame/test_json.py +++ b/pandas/tests/frame/test_json.py @@ -1,6 +1,3 @@ -# -*- coding: utf-8 -*- -"""""" - from __future__ import (absolute_import, division, print_function) import pandas.util.testing as tm From 7a38110ec7f5479c5cc352fb89bac075532bee8f Mon Sep 17 00:00:00 2001 From: Felix Marczinowski Date: Wed, 18 May 2016 15:12:14 +0200 Subject: [PATCH 4/7] add whatsnew note --- doc/source/whatsnew/v0.18.2.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.18.2.txt b/doc/source/whatsnew/v0.18.2.txt index 5b72afe53e30e..3180043b57244 100644 --- a/doc/source/whatsnew/v0.18.2.txt +++ b/doc/source/whatsnew/v0.18.2.txt @@ -113,6 +113,7 @@ Performance Improvements Bug Fixes ~~~~~~~~~ +- Bug in ``io.json.json_normalize()``, where non-ascii keys raised an exception (:issue:`13213`) - Bug in ``SparseSeries`` with ``MultiIndex`` ``[]`` indexing may raise ``IndexError`` (:issue:`13144`) - Bug in ``SparseSeries`` with ``MultiIndex`` ``[]`` indexing result may have normal ``Index`` (:issue:`13144`) - Bug in ``SparseDataFrame`` in which ``axis=None`` did not default to ``axis=0`` (:issue:`13048`) From 25fd0f8325056dea9b9f2306614811786c921835 Mon Sep 17 00:00:00 2001 From: Felix Marczinowski Date: Wed, 18 May 2016 15:23:01 +0200 Subject: [PATCH 5/7] move test, fix py3 issue --- pandas/io/json.py | 2 +- pandas/io/tests/json/test_json_norm.py | 15 +++++++++++++++ pandas/tests/frame/test_json.py | 23 ----------------------- 3 files changed, 16 insertions(+), 24 deletions(-) delete mode 100644 pandas/tests/frame/test_json.py diff --git a/pandas/io/json.py b/pandas/io/json.py index 12de3a21c732d..fd97e51208f7e 100644 --- a/pandas/io/json.py +++ b/pandas/io/json.py @@ -614,7 +614,7 @@ def nested_to_record(ds, prefix="", level=0): new_d = copy.deepcopy(d) for k, v in d.items(): # each key gets renamed with prefix - if not isinstance(k, basestring): + if not isinstance(k, compat.string_types): k = str(k) if level == 0: newkey = k diff --git a/pandas/io/tests/json/test_json_norm.py b/pandas/io/tests/json/test_json_norm.py index 81a1fecbdebac..c47099cc61281 100644 --- a/pandas/io/tests/json/test_json_norm.py +++ b/pandas/io/tests/json/test_json_norm.py @@ -2,6 +2,7 @@ from pandas import DataFrame import numpy as np +import json import pandas.util.testing as tm @@ -164,6 +165,20 @@ def test_record_prefix(self): tm.assert_frame_equal(result, expected) + def test_non_ascii_key(self): + testjson = '[{"\xc3\x9cnic\xc3\xb8de":0,"sub":{"A":1, "B":2}},' \ + '{"\xc3\x9cnic\xc3\xb8de":1,"sub":{"A":3, "B":4}}]' + + testdata = { + u'sub.A': [1, 3], + u'sub.B': [2, 4], + "\xc3\x9cnic\xc3\xb8de".decode('utf8'): [0, 1] + } + testdf = DataFrame(testdata) + + df = json_normalize(json.loads(testjson)) + tm.assert_frame_equal(df, testdf) + class TestNestedToRecord(tm.TestCase): diff --git a/pandas/tests/frame/test_json.py b/pandas/tests/frame/test_json.py deleted file mode 100644 index 2bbf45ae8ba8d..0000000000000 --- a/pandas/tests/frame/test_json.py +++ /dev/null @@ -1,23 +0,0 @@ -from __future__ import (absolute_import, division, print_function) - -import pandas.util.testing as tm -import pandas as pd -import json - - -class TestJSON(tm.TestCase): - testjson = u''' - [{"Ünicøde":0,"sub":{"A":1, "B":2}}, - {"Ünicøde":1,"sub":{"A":3, "B":4}}] - '''.encode('utf8') - - testdata = { - u'sub.A': [1, 3], - u'sub.B': [2, 4], - u'Ünicøde': [0, 1] - } - testdf = pd.DataFrame(testdata) - - def test_json_normalize(self): - df = pd.io.json.json_normalize(json.loads(self.testjson)) - tm.assert_frame_equal(df, self.testdf) From 44745ca323ee73924c6413eb4bec3dbe660a2155 Mon Sep 17 00:00:00 2001 From: Felix Marczinowski Date: Thu, 19 May 2016 09:27:58 +0200 Subject: [PATCH 6/7] fix tests for py3 --- pandas/io/tests/json/test_json_norm.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/pandas/io/tests/json/test_json_norm.py b/pandas/io/tests/json/test_json_norm.py index c47099cc61281..f82eb8911e7aa 100644 --- a/pandas/io/tests/json/test_json_norm.py +++ b/pandas/io/tests/json/test_json_norm.py @@ -5,6 +5,7 @@ import json import pandas.util.testing as tm +import pandas.compat from pandas.io.json import json_normalize, nested_to_record @@ -166,13 +167,17 @@ def test_record_prefix(self): tm.assert_frame_equal(result, expected) def test_non_ascii_key(self): - testjson = '[{"\xc3\x9cnic\xc3\xb8de":0,"sub":{"A":1, "B":2}},' \ - '{"\xc3\x9cnic\xc3\xb8de":1,"sub":{"A":3, "B":4}}]' + if pandas.compat.PY3: + testjson = (b'[{"\xc3\x9cnic\xc3\xb8de":0,"sub":{"A":1, "B":2}},' + \ + b'{"\xc3\x9cnic\xc3\xb8de":1,"sub":{"A":3, "B":4}}]').decode('utf8') + else: + testjson = '[{"\xc3\x9cnic\xc3\xb8de":0,"sub":{"A":1, "B":2}},' \ + '{"\xc3\x9cnic\xc3\xb8de":1,"sub":{"A":3, "B":4}}]' testdata = { u'sub.A': [1, 3], u'sub.B': [2, 4], - "\xc3\x9cnic\xc3\xb8de".decode('utf8'): [0, 1] + b"\xc3\x9cnic\xc3\xb8de".decode('utf8'): [0, 1] } testdf = DataFrame(testdata) From 22e01b26152687a0830015ddef817f3fb9585041 Mon Sep 17 00:00:00 2001 From: Felix Marczinowski Date: Thu, 19 May 2016 09:52:35 +0200 Subject: [PATCH 7/7] fix linter warnings --- pandas/io/tests/json/test_json_norm.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/pandas/io/tests/json/test_json_norm.py b/pandas/io/tests/json/test_json_norm.py index f82eb8911e7aa..28239d15cd3f1 100644 --- a/pandas/io/tests/json/test_json_norm.py +++ b/pandas/io/tests/json/test_json_norm.py @@ -168,11 +168,13 @@ def test_record_prefix(self): def test_non_ascii_key(self): if pandas.compat.PY3: - testjson = (b'[{"\xc3\x9cnic\xc3\xb8de":0,"sub":{"A":1, "B":2}},' + \ - b'{"\xc3\x9cnic\xc3\xb8de":1,"sub":{"A":3, "B":4}}]').decode('utf8') + testjson = ( + b'[{"\xc3\x9cnic\xc3\xb8de":0,"sub":{"A":1, "B":2}},' + + b'{"\xc3\x9cnic\xc3\xb8de":1,"sub":{"A":3, "B":4}}]' + ).decode('utf8') else: - testjson = '[{"\xc3\x9cnic\xc3\xb8de":0,"sub":{"A":1, "B":2}},' \ - '{"\xc3\x9cnic\xc3\xb8de":1,"sub":{"A":3, "B":4}}]' + testjson = ('[{"\xc3\x9cnic\xc3\xb8de":0,"sub":{"A":1, "B":2}},' + '{"\xc3\x9cnic\xc3\xb8de":1,"sub":{"A":3, "B":4}}]') testdata = { u'sub.A': [1, 3],