Skip to content

Commit 01882ba

Browse files
aerymiltsjreback
authored andcommitted
BUG: fixed json_normalize for subrecords with NoneTypes (pandas-dev#20030) (pandas-dev#20399)
1 parent 217174b commit 01882ba

File tree

3 files changed

+58
-1
lines changed

3 files changed

+58
-1
lines changed

doc/source/whatsnew/v0.23.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -981,6 +981,7 @@ I/O
981981
- :class:`Timedelta` now supported in :func:`DataFrame.to_excel` for all Excel file types (:issue:`19242`, :issue:`9155`, :issue:`19900`)
982982
- Bug in :meth:`pandas.io.stata.StataReader.value_labels` raising an ``AttributeError`` when called on very old files. Now returns an empty dict (:issue:`19417`)
983983
- Bug in :func:`read_pickle` when unpickling objects with :class:`TimedeltaIndex` or :class:`Float64Index` created with pandas prior to version 0.20 (:issue:`19939`)
984+
- Bug in :meth:`pandas.io.json.json_normalize` where subrecords are not properly normalized if any subrecords values are NoneType (:issue:`20030`)
984985

985986
Plotting
986987
^^^^^^^^

pandas/io/json/normalize.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,8 @@ def nested_to_record(ds, prefix="", sep=".", level=0):
8080
if level != 0: # so we skip copying for top level, common case
8181
v = new_d.pop(k)
8282
new_d[newkey] = v
83+
if v is None: # pop the key if the value is None
84+
new_d.pop(k)
8385
continue
8486
else:
8587
v = new_d.pop(k)
@@ -189,7 +191,8 @@ def _pull_field(js, spec):
189191
data = [data]
190192

191193
if record_path is None:
192-
if any(isinstance(x, dict) for x in compat.itervalues(data[0])):
194+
if any([[isinstance(x, dict)
195+
for x in compat.itervalues(y)] for y in data]):
193196
# naive normalization, this is idempotent for flat records
194197
# and potentially will inflate the data considerably for
195198
# deeply nested structures:

pandas/tests/io/json/test_normalize.py

+53
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,17 @@ def state_data():
5454
'state': 'Ohio'}]
5555

5656

57+
@pytest.fixture
58+
def author_missing_data():
59+
return [
60+
{'info': None},
61+
{'info':
62+
{'created_at': '11/08/1993', 'last_updated': '26/05/2012'},
63+
'author_name':
64+
{'first': 'Jane', 'last_name': 'Doe'}
65+
}]
66+
67+
5768
class TestJSONNormalize(object):
5869

5970
def test_simple_records(self):
@@ -226,6 +237,23 @@ def test_non_ascii_key(self):
226237
result = json_normalize(json.loads(testjson))
227238
tm.assert_frame_equal(result, expected)
228239

240+
def test_missing_field(self, author_missing_data):
241+
# GH20030: Checks for robustness of json_normalize - should
242+
# unnest records where only the first record has a None value
243+
result = json_normalize(author_missing_data)
244+
ex_data = [
245+
{'author_name.first': np.nan,
246+
'author_name.last_name': np.nan,
247+
'info.created_at': np.nan,
248+
'info.last_updated': np.nan},
249+
{'author_name.first': 'Jane',
250+
'author_name.last_name': 'Doe',
251+
'info.created_at': '11/08/1993',
252+
'info.last_updated': '26/05/2012'}
253+
]
254+
expected = DataFrame(ex_data)
255+
tm.assert_frame_equal(result, expected)
256+
229257

230258
class TestNestedToRecord(object):
231259

@@ -322,3 +350,28 @@ def test_json_normalize_errors(self):
322350
['general', 'trade_version']],
323351
errors='raise'
324352
)
353+
354+
def test_nonetype_dropping(self):
355+
# GH20030: Checks that None values are dropped in nested_to_record
356+
# to prevent additional columns of nans when passed to DataFrame
357+
data = [
358+
{'info': None,
359+
'author_name':
360+
{'first': 'Smith', 'last_name': 'Appleseed'}
361+
},
362+
{'info':
363+
{'created_at': '11/08/1993', 'last_updated': '26/05/2012'},
364+
'author_name':
365+
{'first': 'Jane', 'last_name': 'Doe'}
366+
}
367+
]
368+
result = nested_to_record(data)
369+
expected = [
370+
{'author_name.first': 'Smith',
371+
'author_name.last_name': 'Appleseed'},
372+
{'author_name.first': 'Jane',
373+
'author_name.last_name': 'Doe',
374+
'info.created_at': '11/08/1993',
375+
'info.last_updated': '26/05/2012'}]
376+
377+
assert result == expected

0 commit comments

Comments
 (0)