Skip to content

Commit 88260ea

Browse files
daminisatyaTomAugspurger
authored andcommitted
Fix #21356: JSON nested_to_record Silently Drops Top-Level None Values (#21363)
(cherry picked from commit ff26632)
1 parent 53f2d9f commit 88260ea

File tree

3 files changed

+72
-10
lines changed

3 files changed

+72
-10
lines changed

doc/source/whatsnew/v0.23.1.txt

+5
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,11 @@ Fixed Regressions
2727
- Bug in :meth:`~DataFrame.to_csv` causes encoding error when compression and encoding are specified (:issue:`21241`, :issue:`21118`)
2828
- Bug preventing pandas from being importable with -OO optimization (:issue:`21071`)
2929
- Bug in :meth:`Categorical.fillna` incorrectly raising a ``TypeError`` when `value` the individual categories are iterable and `value` is an iterable (:issue:`21097`, :issue:`19788`)
30+
- Fixed regression in constructors coercing NA values like ``None`` to strings when passing ``dtype=str`` (:issue:`21083`)
31+
- Regression in :func:`pivot_table` where an ordered ``Categorical`` with missing
32+
values for the pivot's ``index`` would give a mis-aligned result (:issue:`21133`)
33+
- Fixed Regression in :func:`nested_to_record` which now flattens list of dictionaries and doesnot drop keys with value as `None` (:issue:`21356`)
34+
3035

3136
.. _whatsnew_0231.performance:
3237

pandas/io/json/normalize.py

-2
Original file line numberDiff line numberDiff line change
@@ -80,8 +80,6 @@ def nested_to_record(ds, prefix="", sep=".", level=0):
8080
if level != 0: # so we skip copying for top level, common case
8181
v = new_d.pop(k)
8282
new_d[newkey] = v
83-
if v is None: # pop the key if the value is None
84-
new_d.pop(k)
8583
continue
8684
else:
8785
v = new_d.pop(k)

pandas/tests/io/json/test_normalize.py

+67-8
Original file line numberDiff line numberDiff line change
@@ -238,15 +238,16 @@ def test_non_ascii_key(self):
238238
tm.assert_frame_equal(result, expected)
239239

240240
def test_missing_field(self, author_missing_data):
241-
# GH20030: Checks for robustness of json_normalize - should
242-
# unnest records where only the first record has a None value
241+
# GH20030:
243242
result = json_normalize(author_missing_data)
244243
ex_data = [
245-
{'author_name.first': np.nan,
244+
{'info': np.nan,
245+
'author_name.first': np.nan,
246246
'author_name.last_name': np.nan,
247247
'info.created_at': np.nan,
248248
'info.last_updated': np.nan},
249-
{'author_name.first': 'Jane',
249+
{'info': None,
250+
'author_name.first': 'Jane',
250251
'author_name.last_name': 'Doe',
251252
'info.created_at': '11/08/1993',
252253
'info.last_updated': '26/05/2012'}
@@ -351,9 +352,8 @@ def test_json_normalize_errors(self):
351352
errors='raise'
352353
)
353354

354-
def test_nonetype_dropping(self):
355-
# GH20030: Checks that None values are dropped in nested_to_record
356-
# to prevent additional columns of nans when passed to DataFrame
355+
def test_donot_drop_nonevalues(self):
356+
# GH21356
357357
data = [
358358
{'info': None,
359359
'author_name':
@@ -367,11 +367,70 @@ def test_nonetype_dropping(self):
367367
]
368368
result = nested_to_record(data)
369369
expected = [
370-
{'author_name.first': 'Smith',
370+
{'info': None,
371+
'author_name.first': 'Smith',
371372
'author_name.last_name': 'Appleseed'},
372373
{'author_name.first': 'Jane',
373374
'author_name.last_name': 'Doe',
374375
'info.created_at': '11/08/1993',
375376
'info.last_updated': '26/05/2012'}]
376377

377378
assert result == expected
379+
380+
def test_nonetype_top_level_bottom_level(self):
381+
# GH21158: If inner level json has a key with a null value
382+
# make sure it doesnt do a new_d.pop twice and except
383+
data = {
384+
"id": None,
385+
"location": {
386+
"country": {
387+
"state": {
388+
"id": None,
389+
"town.info": {
390+
"id": None,
391+
"region": None,
392+
"x": 49.151580810546875,
393+
"y": -33.148521423339844,
394+
"z": 27.572303771972656}}}
395+
}
396+
}
397+
result = nested_to_record(data)
398+
expected = {
399+
'id': None,
400+
'location.country.state.id': None,
401+
'location.country.state.town.info.id': None,
402+
'location.country.state.town.info.region': None,
403+
'location.country.state.town.info.x': 49.151580810546875,
404+
'location.country.state.town.info.y': -33.148521423339844,
405+
'location.country.state.town.info.z': 27.572303771972656}
406+
assert result == expected
407+
408+
def test_nonetype_multiple_levels(self):
409+
# GH21158: If inner level json has a key with a null value
410+
# make sure it doesnt do a new_d.pop twice and except
411+
data = {
412+
"id": None,
413+
"location": {
414+
"id": None,
415+
"country": {
416+
"id": None,
417+
"state": {
418+
"id": None,
419+
"town.info": {
420+
"region": None,
421+
"x": 49.151580810546875,
422+
"y": -33.148521423339844,
423+
"z": 27.572303771972656}}}
424+
}
425+
}
426+
result = nested_to_record(data)
427+
expected = {
428+
'id': None,
429+
'location.id': None,
430+
'location.country.id': None,
431+
'location.country.state.id': None,
432+
'location.country.state.town.info.region': None,
433+
'location.country.state.town.info.x': 49.151580810546875,
434+
'location.country.state.town.info.y': -33.148521423339844,
435+
'location.country.state.town.info.z': 27.572303771972656}
436+
assert result == expected

0 commit comments

Comments
 (0)