Skip to content

Commit e232067

Browse files
committed
BUG: fixed json_normalize for subrecords with NoneTypes
TST: additional coverage for the test cases DOC: added changes to whatsnew/v0.23.0.txt BUG: changed how nan is declared for backward compatibility - python2.7
1 parent 670c2e4 commit e232067

File tree

3 files changed

+54
-1
lines changed

3 files changed

+54
-1
lines changed

doc/source/whatsnew/v0.23.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -979,6 +979,7 @@ I/O
979979
- :class:`Timedelta` now supported in :func:`DataFrame.to_excel` for all Excel file types (:issue:`19242`, :issue:`9155`, :issue:`19900`)
980980
- Bug in :meth:`pandas.io.stata.StataReader.value_labels` raising an ``AttributeError`` when called on very old files. Now returns an empty dict (:issue:`19417`)
981981
- Bug in :func:`read_pickle` when unpickling objects with :class:`TimedeltaIndex` or :class:`Float64Index` created with pandas prior to version 0.20 (:issue:`19939`)
982+
- Bug in :meth:`pandas.io.json.json_normalize` where subrecords are not properly normalized if any subrecords values are NoneType (:issue:`20030`)
982983

983984
Plotting
984985
^^^^^^^^

pandas/io/json/normalize.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,8 @@ def nested_to_record(ds, prefix="", sep=".", level=0):
8080
if level != 0: # so we skip copying for top level, common case
8181
v = new_d.pop(k)
8282
new_d[newkey] = v
83+
if v is None: # pop the key if the value is None
84+
new_d.pop(k)
8385
continue
8486
else:
8587
v = new_d.pop(k)
@@ -189,7 +191,8 @@ def _pull_field(js, spec):
189191
data = [data]
190192

191193
if record_path is None:
192-
if any(isinstance(x, dict) for x in compat.itervalues(data[0])):
194+
if any([[isinstance(x, dict)
195+
for x in compat.itervalues(y)] for y in data]):
193196
# naive normalization, this is idempotent for flat records
194197
# and potentially will inflate the data considerably for
195198
# deeply nested structures:

pandas/tests/io/json/test_normalize.py

+49
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,17 @@ def state_data():
5454
'state': 'Ohio'}]
5555

5656

57+
@pytest.fixture
58+
def author_missing_data():
59+
return [
60+
{'info': None},
61+
{'info':
62+
{'created_at': '11/08/1993', 'last_updated': '26/05/2012'},
63+
'author_name':
64+
{'first': 'Jane', 'last_name': 'Doe'}
65+
}]
66+
67+
5768
class TestJSONNormalize(object):
5869

5970
def test_simple_records(self):
@@ -226,6 +237,21 @@ def test_non_ascii_key(self):
226237
result = json_normalize(json.loads(testjson))
227238
tm.assert_frame_equal(result, expected)
228239

240+
def test_missing_field(self, author_missing_data):
241+
result = json_normalize(author_missing_data)
242+
ex_data = [
243+
{'author_name.first': float('nan'),
244+
'author_name.last_name': float('nan'),
245+
'info.created_at': float('nan'),
246+
'info.last_updated': float('nan')},
247+
{'author_name.first': 'Jane',
248+
'author_name.last_name': 'Doe',
249+
'info.created_at': '11/08/1993',
250+
'info.last_updated': '26/05/2012'}
251+
]
252+
expected = DataFrame(ex_data)
253+
tm.assert_frame_equal(result, expected)
254+
229255

230256
class TestNestedToRecord(object):
231257

@@ -322,3 +348,26 @@ def test_json_normalize_errors(self):
322348
['general', 'trade_version']],
323349
errors='raise'
324350
)
351+
352+
def test_nonetype_dropping(self):
353+
data = [
354+
{'info': None,
355+
'author_name':
356+
{'first': 'Smith', 'last_name': 'Appleseed'}
357+
},
358+
{'info':
359+
{'created_at': '11/08/1993', 'last_updated': '26/05/2012'},
360+
'author_name':
361+
{'first': 'Jane', 'last_name': 'Doe'}
362+
}
363+
]
364+
result = nested_to_record(data)
365+
expected = [
366+
{'author_name.first': 'Smith',
367+
'author_name.last_name': 'Appleseed'},
368+
{'author_name.first': 'Jane',
369+
'author_name.last_name': 'Doe',
370+
'info.created_at': '11/08/1993',
371+
'info.last_updated': '26/05/2012'}]
372+
373+
assert result == expected

0 commit comments

Comments
 (0)