Skip to content

Commit 7d8bc0d

Browse files
dickreuterjreback
dickreuter
authored andcommitted
ENH: Added errors{'raise','ignore'} for keys not found in meta for json_normalize
Author: dickreuter <[email protected]> Closes #14583 from dickreuter/json_normalize_enhancement and squashes the following commits: 701c140 [dickreuter] adjusted formatting 3c94206 [dickreuter] shortened lines to pass linting 2028924 [dickreuter] doc changes d298588 [dickreuter] Fixed as instructed in pull request page bcfbf18 [dickreuter] Avoids exception when pandas.io.json.json_normalize
1 parent 4a5aec4 commit 7d8bc0d

File tree

3 files changed

+81
-2
lines changed

3 files changed

+81
-2
lines changed

doc/source/whatsnew/v0.20.0.txt

+2
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,8 @@ Other enhancements
6161
- The ``usecols`` argument in ``pd.read_csv`` now accepts a callable function as a value (:issue:`14154`)
6262
- ``pd.DataFrame.plot`` now prints a title above each subplot if ``suplots=True`` and ``title`` is a list of strings (:issue:`14753`)
6363
- ``pd.Series.interpolate`` now supports timedelta as an index type with ``method='time'`` (:issue:`6424`)
64+
- ``pandas.io.json.json_normalize()`` gained the option ``errors='ignore'|'raise'``; the default is ``errors='raise'`` which is backward compatible. (:issue:`14583`)
65+
6466

6567
.. _whatsnew_0200.api_breaking:
6668

pandas/io/json.py

+20-2
Original file line numberDiff line numberDiff line change
@@ -725,7 +725,9 @@ def nested_to_record(ds, prefix="", level=0):
725725

726726
def json_normalize(data, record_path=None, meta=None,
727727
meta_prefix=None,
728-
record_prefix=None):
728+
record_prefix=None,
729+
errors='raise'):
730+
729731
"""
730732
"Normalize" semi-structured JSON data into a flat table
731733
@@ -742,6 +744,13 @@ def json_normalize(data, record_path=None, meta=None,
742744
If True, prefix records with dotted (?) path, e.g. foo.bar.field if
743745
path to records is ['foo', 'bar']
744746
meta_prefix : string, default None
747+
errors : {'raise', 'ignore'}, default 'raise'
748+
* ignore : will ignore KeyError if keys listed in meta are not
749+
always present
750+
* raise : will raise KeyError if keys listed in meta are not
751+
always present
752+
753+
.. versionadded:: 0.20.0
745754
746755
Returns
747756
-------
@@ -841,7 +850,16 @@ def _recursive_extract(data, path, seen_meta, level=0):
841850
if level + 1 > len(val):
842851
meta_val = seen_meta[key]
843852
else:
844-
meta_val = _pull_field(obj, val[level:])
853+
try:
854+
meta_val = _pull_field(obj, val[level:])
855+
except KeyError as e:
856+
if errors == 'ignore':
857+
meta_val = np.nan
858+
else:
859+
raise \
860+
KeyError("Try running with "
861+
"errors='ignore' as key "
862+
"%s is not always present", e)
845863
meta_vals[key].append(meta_val)
846864

847865
records.extend(recs)

pandas/io/tests/json/test_json_norm.py

+59
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,65 @@ def test_nested_flattens(self):
225225

226226
self.assertEqual(result, expected)
227227

228+
def test_json_normalize_errors(self):
229+
# GH14583: If meta keys are not always present
230+
# a new option to set errors='ignore' has been implemented
231+
i = {
232+
"Trades": [{
233+
"general": {
234+
"tradeid": 100,
235+
"trade_version": 1,
236+
"stocks": [{
237+
238+
"symbol": "AAPL",
239+
"name": "Apple",
240+
"price": "0"
241+
}, {
242+
"symbol": "GOOG",
243+
"name": "Google",
244+
"price": "0"
245+
}
246+
]
247+
}
248+
}, {
249+
"general": {
250+
"tradeid": 100,
251+
"stocks": [{
252+
"symbol": "AAPL",
253+
"name": "Apple",
254+
"price": "0"
255+
}, {
256+
"symbol": "GOOG",
257+
"name": "Google",
258+
"price": "0"
259+
}
260+
]
261+
}
262+
}
263+
]
264+
}
265+
j = json_normalize(data=i['Trades'],
266+
record_path=[['general', 'stocks']],
267+
meta=[['general', 'tradeid'],
268+
['general', 'trade_version']],
269+
errors='ignore')
270+
expected = {'general.trade_version': {0: 1.0, 1: 1.0, 2: '', 3: ''},
271+
'general.tradeid': {0: 100, 1: 100, 2: 100, 3: 100},
272+
'name': {0: 'Apple', 1: 'Google', 2: 'Apple', 3: 'Google'},
273+
'price': {0: '0', 1: '0', 2: '0', 3: '0'},
274+
'symbol': {0: 'AAPL', 1: 'GOOG', 2: 'AAPL', 3: 'GOOG'}}
275+
276+
self.assertEqual(j.fillna('').to_dict(), expected)
277+
278+
self.assertRaises(KeyError,
279+
json_normalize, data=i['Trades'],
280+
record_path=[['general', 'stocks']],
281+
meta=[['general', 'tradeid'],
282+
['general', 'trade_version']],
283+
errors='raise'
284+
)
285+
286+
228287
if __name__ == '__main__':
229288
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb',
230289
'--pdb-failure', '-s'], exit=False)

0 commit comments

Comments
 (0)