Skip to content

Commit bcfbf18

Browse files
author
dickreuter
committed
Avoids exception when pandas.io.json.json_normalize contains items in meta parameter that don't always occur in every item of the list
Added documentation and test for issue pandas-dev#14505 Added keyword errors {'raise'|'ignore} Shortened what's new Removed commas in dictionary for linting compatibility Updated doc
1 parent 14e4815 commit bcfbf18

File tree

2 files changed

+66
-2
lines changed

2 files changed

+66
-2
lines changed

pandas/io/json.py

+13-2
Original file line numberDiff line numberDiff line change
@@ -723,7 +723,9 @@ def nested_to_record(ds, prefix="", level=0):
723723

724724
def json_normalize(data, record_path=None, meta=None,
725725
meta_prefix=None,
726-
record_prefix=None):
726+
record_prefix=None,
727+
errors='raise'):
728+
727729
"""
728730
"Normalize" semi-structured JSON data into a flat table
729731
@@ -740,6 +742,8 @@ def json_normalize(data, record_path=None, meta=None,
740742
If True, prefix records with dotted (?) path, e.g. foo.bar.field if
741743
path to records is ['foo', 'bar']
742744
meta_prefix : string, default None
745+
error: {'raise', 'ignore'}, default 'raise'
746+
* ignore: will ignore keyErrors if keys listed in meta are not always present
743747
744748
Returns
745749
-------
@@ -839,7 +843,14 @@ def _recursive_extract(data, path, seen_meta, level=0):
839843
if level + 1 > len(val):
840844
meta_val = seen_meta[key]
841845
else:
842-
meta_val = _pull_field(obj, val[level:])
846+
try:
847+
meta_val = _pull_field(obj, val[level:])
848+
except KeyError as e:
849+
if errors == 'ignore':
850+
meta_val = np.nan
851+
else:
852+
raise KeyError(
853+
"Try running with errors='ignore' as the following key may not always be present: " + str(e))
843854
meta_vals[key].append(meta_val)
844855

845856
records.extend(recs)

pandas/io/tests/json/test_json_norm.py

+53
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,59 @@ def test_nested_flattens(self):
225225

226226
self.assertEqual(result, expected)
227227

228+
229+
def test_json_normalise_fix(self):
230+
# issue 14505
231+
j = {
232+
"Trades": [{
233+
"general": {
234+
"tradeid": 100,
235+
"trade_version": 1,
236+
"stocks": [{
237+
238+
"symbol": "AAPL",
239+
"name": "Apple",
240+
"price": "0"
241+
242+
}, {
243+
244+
"symbol": "GOOG",
245+
"name": "Google",
246+
"price": "0"
247+
248+
}
249+
]
250+
}
251+
}, {
252+
"general": {
253+
"tradeid": 100,
254+
"stocks": [{
255+
256+
"symbol": "AAPL",
257+
"name": "Apple",
258+
"price": "0"
259+
260+
}, {
261+
"symbol": "GOOG",
262+
"name": "Google",
263+
"price": "0"
264+
265+
}
266+
]
267+
}
268+
}
269+
]
270+
}
271+
j = json_normalize(data=j['Trades'], record_path=[['general', 'stocks']],
272+
meta=[['general', 'tradeid'], ['general', 'trade_version']], errors='ignore')
273+
expected={'general.trade_version': {0: 1.0, 1: 1.0, 2: '', 3: ''},
274+
'general.tradeid': {0: 100, 1: 100, 2: 100, 3: 100},
275+
'name': {0: 'Apple', 1: 'Google', 2: 'Apple', 3: 'Google'},
276+
'price': {0: '0', 1: '0', 2: '0', 3: '0'},
277+
'symbol': {0: 'AAPL', 1: 'GOOG', 2: 'AAPL', 3: 'GOOG'}}
278+
279+
self.assertEqual(j.fillna('').to_dict(), expected)
280+
228281
if __name__ == '__main__':
229282
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb',
230283
'--pdb-failure', '-s'], exit=False)

0 commit comments

Comments
 (0)