Skip to content

Commit 9848837

Browse files
author
dickreuter
committed
Added keyword errors {'raise'|'ignore}
Added documenation Shortened what's new Removed commas in dictionary for linting compatibility
1 parent 8928270 commit 9848837

File tree

2 files changed

+31
-20
lines changed

2 files changed

+31
-20
lines changed

pandas/io/json.py

+19-16
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,9 @@
2222
def to_json(path_or_buf, obj, orient=None, date_format='epoch',
2323
double_precision=10, force_ascii=True, date_unit='ms',
2424
default_handler=None, lines=False):
25-
2625
if lines and orient != 'records':
27-
raise ValueError(
28-
"'lines' keyword only valid when 'orient' is records")
26+
raise ValueError(
27+
"'lines' keyword only valid when 'orient' is records")
2928

3029
if isinstance(obj, Series):
3130
s = SeriesWriter(
@@ -53,7 +52,6 @@ def to_json(path_or_buf, obj, orient=None, date_format='epoch',
5352

5453

5554
class Writer(object):
56-
5755
def __init__(self, obj, orient, date_format, double_precision,
5856
ensure_ascii, date_unit, default_handler=None):
5957
self.obj = obj
@@ -291,7 +289,6 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True,
291289

292290

293291
class Parser(object):
294-
295292
_STAMP_UNITS = ('s', 'ms', 'us', 'ns')
296293
_MIN_STAMPS = {
297294
's': long(31536000),
@@ -492,8 +489,8 @@ def _parse_no_numpy(self):
492489
if orient == "split":
493490
decoded = dict((str(k), v)
494491
for k, v in compat.iteritems(loads(
495-
json,
496-
precise_float=self.precise_float)))
492+
json,
493+
precise_float=self.precise_float)))
497494
self.check_keys_split(decoded)
498495
self.obj = Series(dtype=None, **decoded)
499496
else:
@@ -567,8 +564,8 @@ def _parse_no_numpy(self):
567564
elif orient == "split":
568565
decoded = dict((str(k), v)
569566
for k, v in compat.iteritems(loads(
570-
json,
571-
precise_float=self.precise_float)))
567+
json,
568+
precise_float=self.precise_float)))
572569
self.check_keys_split(decoded)
573570
self.obj = DataFrame(dtype=None, **decoded)
574571
elif orient == "index":
@@ -595,7 +592,6 @@ def _process_converter(self, f, filt=None):
595592
new_obj[i] = c
596593

597594
if needs_new_obj:
598-
599595
# possibly handle dup columns
600596
new_obj = DataFrame(new_obj, index=self.obj.index)
601597
new_obj.columns = self.obj.columns
@@ -628,9 +624,9 @@ def is_ok(col):
628624
col_lower = col.lower()
629625
if (col_lower.endswith('_at') or
630626
col_lower.endswith('_time') or
631-
col_lower == 'modified' or
632-
col_lower == 'date' or
633-
col_lower == 'datetime' or
627+
col_lower == 'modified' or
628+
col_lower == 'date' or
629+
col_lower == 'datetime' or
634630
col_lower.startswith('timestamp')):
635631
return True
636632
return False
@@ -640,6 +636,7 @@ def is_ok(col):
640636
lambda col, c: ((self.keep_default_dates and is_ok(col)) or
641637
col in convert_dates))
642638

639+
643640
# ---------------------------------------------------------------------
644641
# JSON normalization routines
645642

@@ -723,7 +720,7 @@ def nested_to_record(ds, prefix="", level=0):
723720

724721
def json_normalize(data, record_path=None, meta=None,
725722
meta_prefix=None,
726-
record_prefix=None):
723+
record_prefix=None, errors='raise'):
727724
"""
728725
"Normalize" semi-structured JSON data into a flat table
729726
@@ -740,6 +737,8 @@ def json_normalize(data, record_path=None, meta=None,
740737
If True, prefix records with dotted (?) path, e.g. foo.bar.field if
741738
path to records is ['foo', 'bar']
742739
meta_prefix : string, default None
740+
error: {'raise', 'ignore'}, default 'raise'
741+
* ignore: will ignore keyErrors if keys listed in meta are not always present
743742
744743
Returns
745744
-------
@@ -775,6 +774,7 @@ def json_normalize(data, record_path=None, meta=None,
775774
4 Cuyahoga 1337 John Kasich Ohio OH
776775
777776
"""
777+
778778
def _pull_field(js, spec):
779779
result = js
780780
if isinstance(spec, list):
@@ -841,8 +841,11 @@ def _recursive_extract(data, path, seen_meta, level=0):
841841
else:
842842
try:
843843
meta_val = _pull_field(obj, val[level:])
844-
except:
845-
meta_val = np.nan
844+
except KeyError as e:
845+
if errors == 'ignore':
846+
meta_val = np.nan
847+
else:
848+
raise KeyError("Try running with errors='ignore' as the following key may not always be present: "+str(e))
846849
meta_vals[key].append(meta_val)
847850

848851
records.extend(recs)

pandas/io/tests/json/test_json_norm.py

+12-4
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,9 @@ def test_nested_flattens(self):
225225

226226
self.assertEqual(result, expected)
227227

228+
228229
def test_json_normalise_fix(self):
230+
# issue 14505
229231
j = {
230232
"Trades": [{
231233
"general": {
@@ -245,7 +247,7 @@ def test_json_normalise_fix(self):
245247

246248
}
247249
]
248-
},
250+
}
249251
}, {
250252
"general": {
251253
"tradeid": 100,
@@ -262,13 +264,19 @@ def test_json_normalise_fix(self):
262264

263265
}
264266
]
265-
},
267+
}
266268
}
267269
]
268270
}
269271
j = json_normalize(data=j['Trades'], record_path=[['general', 'stocks']],
270-
meta=[['general', 'tradeid'], ['general', 'trade_version']])
271-
self.assertEqual(len(j), 4)
272+
meta=[['general', 'tradeid'], ['general', 'trade_version']], errors='ignore')
273+
expected={'general.trade_version': {0: 1.0, 1: 1.0, 2: '', 3: ''},
274+
'general.tradeid': {0: 100, 1: 100, 2: 100, 3: 100},
275+
'name': {0: 'Apple', 1: 'Google', 2: 'Apple', 3: 'Google'},
276+
'price': {0: '0', 1: '0', 2: '0', 3: '0'},
277+
'symbol': {0: 'AAPL', 1: 'GOOG', 2: 'AAPL', 3: 'GOOG'}}
278+
279+
self.assertEqual(j.fillna('').to_dict(), expected)
272280

273281
if __name__ == '__main__':
274282
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb',

0 commit comments

Comments
 (0)