Skip to content

Commit dfa6579

Browse files
committed
BUG: Fix json_normalize throwing TypeError when record_path has a sequence of dicts along its path pandas-dev#22706
1 parent bdb7a16 commit dfa6579

File tree

3 files changed

+45
-24
lines changed

3 files changed

+45
-24
lines changed

doc/source/whatsnew/v0.23.5.txt

+2
Original file line numberDiff line numberDiff line change
@@ -52,3 +52,5 @@ Bug Fixes
5252
**I/O**
5353

5454
- Bug in :func:`read_csv` that caused it to raise ``OverflowError`` when trying to use 'inf' as ``na_value`` with integer index column (:issue:`17128`)
55+
- Bug in :func:`json_normalize` that caused it to raise ``TypeError`` when ``record_path`` has a sequence of dicts along its path (:issue:`22706`)
56+
-

pandas/io/json/normalize.py

+28-24
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,30 @@ def _pull_field(js, spec):
224224
sep = str(sep)
225225
meta_keys = [sep.join(val) for val in meta]
226226

227+
def _extract(obj, path, seen_meta, level):
228+
recs = _pull_field(obj, path[0])
229+
230+
# For repeating the metadata later
231+
lengths.append(len(recs))
232+
233+
for val, key in zip(meta, meta_keys):
234+
if level + 1 > len(val):
235+
meta_val = seen_meta[key]
236+
else:
237+
try:
238+
meta_val = _pull_field(obj, val[level:])
239+
except KeyError as e:
240+
if errors == 'ignore':
241+
meta_val = np.nan
242+
else:
243+
raise KeyError("Try running with "
244+
"errors='ignore' as key "
245+
"{err} is not always present"
246+
.format(err=e))
247+
meta_vals[key].append(meta_val)
248+
249+
records.extend(recs)
250+
227251
def _recursive_extract(data, path, seen_meta, level=0):
228252
if len(path) > 1:
229253
for obj in data:
@@ -233,31 +257,11 @@ def _recursive_extract(data, path, seen_meta, level=0):
233257

234258
_recursive_extract(obj[path[0]], path[1:],
235259
seen_meta, level=level + 1)
236-
else:
260+
elif isinstance(data, list):
237261
for obj in data:
238-
recs = _pull_field(obj, path[0])
239-
240-
# For repeating the metadata later
241-
lengths.append(len(recs))
242-
243-
for val, key in zip(meta, meta_keys):
244-
if level + 1 > len(val):
245-
meta_val = seen_meta[key]
246-
else:
247-
try:
248-
meta_val = _pull_field(obj, val[level:])
249-
except KeyError as e:
250-
if errors == 'ignore':
251-
meta_val = np.nan
252-
else:
253-
raise \
254-
KeyError("Try running with "
255-
"errors='ignore' as key "
256-
"{err} is not always present"
257-
.format(err=e))
258-
meta_vals[key].append(meta_val)
259-
260-
records.extend(recs)
262+
_extract(obj, path, seen_meta, level)
263+
else:
264+
_extract(data, path, seen_meta, level)
261265

262266
_recursive_extract(data, record_path, {}, level=0)
263267

pandas/tests/io/json/test_normalize.py

+15
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,21 @@ def test_value_array_record_prefix(self):
129129
expected = DataFrame([[1], [2]], columns=['Prefix.0'])
130130
tm.assert_frame_equal(result, expected)
131131

132+
def test_nested_object_record_path(self):
133+
# GH 22706
134+
data = {'state': 'Florida',
135+
'info': {
136+
'governor': 'Rick Scott',
137+
'counties': [{'name': 'Dade', 'population': 12345},
138+
{'name': 'Broward', 'population': 40000},
139+
{'name': 'Palm Beach', 'population': 60000}]}}
140+
result = json_normalize(data, record_path=["info", "counties"])
141+
expected = DataFrame([['Dade', 12345],
142+
['Broward', 40000],
143+
['Palm Beach', 60000]],
144+
columns=['name', 'population'])
145+
tm.assert_frame_equal(result, expected)
146+
132147
def test_more_deeply_nested(self, deep_nested):
133148

134149
result = json_normalize(deep_nested, ['states', 'cities'],

0 commit comments

Comments
 (0)