diff --git a/doc/source/whatsnew/v0.23.1.txt b/doc/source/whatsnew/v0.23.1.txt index 807b83fa2cf69..8ce2824d779f4 100644 --- a/doc/source/whatsnew/v0.23.1.txt +++ b/doc/source/whatsnew/v0.23.1.txt @@ -31,6 +31,7 @@ Fixed Regressions - Fixed regression in constructors coercing NA values like ``None`` to strings when passing ``dtype=str`` (:issue:`21083`) - Regression in :func:`pivot_table` where an ordered ``Categorical`` with missing values for the pivot's ``index`` would give a mis-aligned result (:issue:`21133`) +- Fixed Regression in :func:`nested_to_record` which now flattens list of dictionaries and doesnot drop keys with value as `None` (:issue:`21356`) .. _whatsnew_0231.performance: diff --git a/pandas/io/json/normalize.py b/pandas/io/json/normalize.py index 17393d458e746..b845a43b9ca9e 100644 --- a/pandas/io/json/normalize.py +++ b/pandas/io/json/normalize.py @@ -80,8 +80,6 @@ def nested_to_record(ds, prefix="", sep=".", level=0): if level != 0: # so we skip copying for top level, common case v = new_d.pop(k) new_d[newkey] = v - elif v is None: # pop the key if the value is None - new_d.pop(k) continue else: v = new_d.pop(k) diff --git a/pandas/tests/io/json/test_normalize.py b/pandas/tests/io/json/test_normalize.py index dc34ba81f679d..395c2c90767d3 100644 --- a/pandas/tests/io/json/test_normalize.py +++ b/pandas/tests/io/json/test_normalize.py @@ -238,15 +238,16 @@ def test_non_ascii_key(self): tm.assert_frame_equal(result, expected) def test_missing_field(self, author_missing_data): - # GH20030: Checks for robustness of json_normalize - should - # unnest records where only the first record has a None value + # GH20030: result = json_normalize(author_missing_data) ex_data = [ - {'author_name.first': np.nan, + {'info': np.nan, + 'author_name.first': np.nan, 'author_name.last_name': np.nan, 'info.created_at': np.nan, 'info.last_updated': np.nan}, - {'author_name.first': 'Jane', + {'info': None, + 'author_name.first': 'Jane', 'author_name.last_name': 'Doe', 'info.created_at': '11/08/1993', 'info.last_updated': '26/05/2012'} @@ -351,9 +352,8 @@ def test_json_normalize_errors(self): errors='raise' ) - def test_nonetype_dropping(self): - # GH20030: Checks that None values are dropped in nested_to_record - # to prevent additional columns of nans when passed to DataFrame + def test_donot_drop_nonevalues(self): + # GH21356 data = [ {'info': None, 'author_name': @@ -367,7 +367,8 @@ def test_nonetype_dropping(self): ] result = nested_to_record(data) expected = [ - {'author_name.first': 'Smith', + {'info': None, + 'author_name.first': 'Smith', 'author_name.last_name': 'Appleseed'}, {'author_name.first': 'Jane', 'author_name.last_name': 'Doe', @@ -395,6 +396,7 @@ def test_nonetype_top_level_bottom_level(self): } result = nested_to_record(data) expected = { + 'id': None, 'location.country.state.id': None, 'location.country.state.town.info.id': None, 'location.country.state.town.info.region': None, @@ -423,6 +425,7 @@ def test_nonetype_multiple_levels(self): } result = nested_to_record(data) expected = { + 'id': None, 'location.id': None, 'location.country.id': None, 'location.country.state.id': None,