Skip to content

Commit eeccd05

Browse files
Felix Marczinowskijreback
Felix Marczinowski
authored andcommitted
BUG: Fix #13213 json_normalize() and non-ascii characters in keys
closes #13213 Author: Felix Marczinowski <[email protected]> Closes #13214 from fmarczin/13213-unicode-json_normalize and squashes the following commits: 22e01b2 [Felix Marczinowski] fix linter warnings 44745ca [Felix Marczinowski] fix tests for py3 25fd0f8 [Felix Marczinowski] move test, fix py3 issue 7a38110 [Felix Marczinowski] add whatsnew note dd7302c [Felix Marczinowski] remove encoding signature from test 4dcd2c5 [Felix Marczinowski] fix for #13213 b9751e9 [Felix Marczinowski] add test for #13213
1 parent 4b50149 commit eeccd05

File tree

3 files changed

+27
-2
lines changed

3 files changed

+27
-2
lines changed

doc/source/whatsnew/v0.18.2.txt

+1
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,7 @@ Performance Improvements
113113
Bug Fixes
114114
~~~~~~~~~
115115

116+
- Bug in ``io.json.json_normalize()``, where non-ascii keys raised an exception (:issue:`13213`)
116117
- Bug in ``SparseSeries`` with ``MultiIndex`` ``[]`` indexing may raise ``IndexError`` (:issue:`13144`)
117118
- Bug in ``SparseSeries`` with ``MultiIndex`` ``[]`` indexing result may have normal ``Index`` (:issue:`13144`)
118119
- Bug in ``SparseDataFrame`` in which ``axis=None`` did not default to ``axis=0`` (:issue:`13048`)

pandas/io/json.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -614,10 +614,12 @@ def nested_to_record(ds, prefix="", level=0):
614614
new_d = copy.deepcopy(d)
615615
for k, v in d.items():
616616
# each key gets renamed with prefix
617+
if not isinstance(k, compat.string_types):
618+
k = str(k)
617619
if level == 0:
618-
newkey = str(k)
620+
newkey = k
619621
else:
620-
newkey = prefix + '.' + str(k)
622+
newkey = prefix + '.' + k
621623

622624
# only dicts gets recurse-flattend
623625
# only at level>1 do we rename the rest of the keys

pandas/io/tests/json/test_json_norm.py

+22
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,10 @@
22

33
from pandas import DataFrame
44
import numpy as np
5+
import json
56

67
import pandas.util.testing as tm
8+
from pandas import compat
79

810
from pandas.io.json import json_normalize, nested_to_record
911

@@ -164,6 +166,26 @@ def test_record_prefix(self):
164166

165167
tm.assert_frame_equal(result, expected)
166168

169+
def test_non_ascii_key(self):
170+
if compat.PY3:
171+
testjson = (
172+
b'[{"\xc3\x9cnic\xc3\xb8de":0,"sub":{"A":1, "B":2}},' +
173+
b'{"\xc3\x9cnic\xc3\xb8de":1,"sub":{"A":3, "B":4}}]'
174+
).decode('utf8')
175+
else:
176+
testjson = ('[{"\xc3\x9cnic\xc3\xb8de":0,"sub":{"A":1, "B":2}},'
177+
'{"\xc3\x9cnic\xc3\xb8de":1,"sub":{"A":3, "B":4}}]')
178+
179+
testdata = {
180+
u'sub.A': [1, 3],
181+
u'sub.B': [2, 4],
182+
b"\xc3\x9cnic\xc3\xb8de".decode('utf8'): [0, 1]
183+
}
184+
expected = DataFrame(testdata)
185+
186+
result = json_normalize(json.loads(testjson))
187+
tm.assert_frame_equal(result, expected)
188+
167189

168190
class TestNestedToRecord(tm.TestCase):
169191

0 commit comments

Comments
 (0)