diff --git a/doc/source/whatsnew/v0.23.1.txt b/doc/source/whatsnew/v0.23.1.txt index 974527624a312..e29cb0a5a2626 100644 --- a/doc/source/whatsnew/v0.23.1.txt +++ b/doc/source/whatsnew/v0.23.1.txt @@ -97,6 +97,7 @@ I/O - Bug in IO methods specifying ``compression='zip'`` which produced uncompressed zip archives (:issue:`17778`, :issue:`21144`) - Bug in :meth:`DataFrame.to_stata` which prevented exporting DataFrames to buffers and most file-like objects (:issue:`21041`) +- Bug when :meth:`pandas.io.json.json_normalize` was called with ``None`` values in nested levels in JSON (:issue:`21158`) - Bug in :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` causes encoding error when compression and encoding are specified (:issue:`21241`, :issue:`21118`) - Bug in :meth:`read_stata` and :class:`StataReader` which did not correctly decode utf-8 strings on Python 3 from Stata 14 files (dta version 118) (:issue:`21244`) - diff --git a/pandas/io/json/normalize.py b/pandas/io/json/normalize.py index 549204abd3caf..17393d458e746 100644 --- a/pandas/io/json/normalize.py +++ b/pandas/io/json/normalize.py @@ -80,7 +80,7 @@ def nested_to_record(ds, prefix="", sep=".", level=0): if level != 0: # so we skip copying for top level, common case v = new_d.pop(k) new_d[newkey] = v - if v is None: # pop the key if the value is None + elif v is None: # pop the key if the value is None new_d.pop(k) continue else: diff --git a/pandas/tests/io/json/test_normalize.py b/pandas/tests/io/json/test_normalize.py index 0fabaf747b6de..dc34ba81f679d 100644 --- a/pandas/tests/io/json/test_normalize.py +++ b/pandas/tests/io/json/test_normalize.py @@ -375,3 +375,59 @@ def test_nonetype_dropping(self): 'info.last_updated': '26/05/2012'}] assert result == expected + + def test_nonetype_top_level_bottom_level(self): + # GH21158: If inner level json has a key with a null value + # make sure it doesnt do a new_d.pop twice and except + data = { + "id": None, + "location": { + "country": { + "state": { + "id": None, + "town.info": { + "id": None, + "region": None, + "x": 49.151580810546875, + "y": -33.148521423339844, + "z": 27.572303771972656}}} + } + } + result = nested_to_record(data) + expected = { + 'location.country.state.id': None, + 'location.country.state.town.info.id': None, + 'location.country.state.town.info.region': None, + 'location.country.state.town.info.x': 49.151580810546875, + 'location.country.state.town.info.y': -33.148521423339844, + 'location.country.state.town.info.z': 27.572303771972656} + assert result == expected + + def test_nonetype_multiple_levels(self): + # GH21158: If inner level json has a key with a null value + # make sure it doesnt do a new_d.pop twice and except + data = { + "id": None, + "location": { + "id": None, + "country": { + "id": None, + "state": { + "id": None, + "town.info": { + "region": None, + "x": 49.151580810546875, + "y": -33.148521423339844, + "z": 27.572303771972656}}} + } + } + result = nested_to_record(data) + expected = { + 'location.id': None, + 'location.country.id': None, + 'location.country.state.id': None, + 'location.country.state.town.info.region': None, + 'location.country.state.town.info.x': 49.151580810546875, + 'location.country.state.town.info.y': -33.148521423339844, + 'location.country.state.town.info.z': 27.572303771972656} + assert result == expected