diff --git a/doc/source/whatsnew/v0.18.2.txt b/doc/source/whatsnew/v0.18.2.txt index 5b72afe53e30e..3180043b57244 100644 --- a/doc/source/whatsnew/v0.18.2.txt +++ b/doc/source/whatsnew/v0.18.2.txt @@ -113,6 +113,7 @@ Performance Improvements Bug Fixes ~~~~~~~~~ +- Bug in ``io.json.json_normalize()``, where non-ascii keys raised an exception (:issue:`13213`) - Bug in ``SparseSeries`` with ``MultiIndex`` ``[]`` indexing may raise ``IndexError`` (:issue:`13144`) - Bug in ``SparseSeries`` with ``MultiIndex`` ``[]`` indexing result may have normal ``Index`` (:issue:`13144`) - Bug in ``SparseDataFrame`` in which ``axis=None`` did not default to ``axis=0`` (:issue:`13048`) diff --git a/pandas/io/json.py b/pandas/io/json.py index 08bfd8d7796a0..fd97e51208f7e 100644 --- a/pandas/io/json.py +++ b/pandas/io/json.py @@ -614,10 +614,12 @@ def nested_to_record(ds, prefix="", level=0): new_d = copy.deepcopy(d) for k, v in d.items(): # each key gets renamed with prefix + if not isinstance(k, compat.string_types): + k = str(k) if level == 0: - newkey = str(k) + newkey = k else: - newkey = prefix + '.' + str(k) + newkey = prefix + '.' + k # only dicts gets recurse-flattend # only at level>1 do we rename the rest of the keys diff --git a/pandas/io/tests/json/test_json_norm.py b/pandas/io/tests/json/test_json_norm.py index 81a1fecbdebac..28239d15cd3f1 100644 --- a/pandas/io/tests/json/test_json_norm.py +++ b/pandas/io/tests/json/test_json_norm.py @@ -2,8 +2,10 @@ from pandas import DataFrame import numpy as np +import json import pandas.util.testing as tm +import pandas.compat from pandas.io.json import json_normalize, nested_to_record @@ -164,6 +166,26 @@ def test_record_prefix(self): tm.assert_frame_equal(result, expected) + def test_non_ascii_key(self): + if pandas.compat.PY3: + testjson = ( + b'[{"\xc3\x9cnic\xc3\xb8de":0,"sub":{"A":1, "B":2}},' + + b'{"\xc3\x9cnic\xc3\xb8de":1,"sub":{"A":3, "B":4}}]' + ).decode('utf8') + else: + testjson = ('[{"\xc3\x9cnic\xc3\xb8de":0,"sub":{"A":1, "B":2}},' + '{"\xc3\x9cnic\xc3\xb8de":1,"sub":{"A":3, "B":4}}]') + + testdata = { + u'sub.A': [1, 3], + u'sub.B': [2, 4], + b"\xc3\x9cnic\xc3\xb8de".decode('utf8'): [0, 1] + } + testdf = DataFrame(testdata) + + df = json_normalize(json.loads(testjson)) + tm.assert_frame_equal(df, testdf) + class TestNestedToRecord(tm.TestCase):