Skip to content

Commit e53b620

Browse files
author
Jiang Yue
committed
add test cases
1 parent b25faf7 commit e53b620

File tree

2 files changed

+67
-3
lines changed

2 files changed

+67
-3
lines changed

pandas/io/json/normalize.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -96,11 +96,12 @@ def nested_to_record(ds, prefix="", sep=".", level=0):
9696
return new_ds
9797

9898

99-
def json_normalize(data, fill_value=None, record_path=None, meta=None,
99+
def json_normalize(data, record_path=None, meta=None,
100100
meta_prefix=None,
101101
record_prefix=None,
102102
errors='raise',
103-
sep='.'):
103+
sep='.',
104+
fill_value=None):
104105
"""
105106
Normalize semi-structured JSON data into a flat table.
106107
@@ -151,7 +152,7 @@ def json_normalize(data, fill_value=None, record_path=None, meta=None,
151152
1 NaN NaN Regner NaN Mose NaN
152153
2 2.0 Faye Raker NaN NaN NaN NaN
153154
154-
>>> json_normalize(data, fill_value={'id' : -1})
155+
>>> json_normalize(data, fill_value={'id': -1})
155156
id name name.family name.first name.given name.last
156157
0 1 NaN NaN Coleen NaN Volk
157158
1 -1 NaN Regner NaN Mose NaN

pandas/tests/io/json/test_normalize.py

+63
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,32 @@ def deep_nested():
3838
}
3939
]
4040

41+
@pytest.fixture
42+
def deep_nested_missing():
43+
# deeply nested data with some missing values
44+
return [{'country': 'USA',
45+
'states': [{'name': 'California',
46+
'cities': [{'name': 'San Francisco',
47+
'pop': 12345},
48+
{'name': 'Los Angeles',
49+
'pop': 12346}]
50+
},
51+
{'name': 'Ohio',
52+
'cities': [{'name': 'Columbus',
53+
'pop': 1234},
54+
{'pop': 1236}]}
55+
]
56+
},
57+
{'country': 'Germany',
58+
'states': [{'name': 'Bayern',
59+
'cities': [{'name': 'Munich'}]
60+
},
61+
{'name': 'Nordrhein-Westfalen',
62+
'cities': [{'name': 'Duesseldorf', 'pop': 1238},
63+
{'name': 'Koeln'}]}
64+
]
65+
}
66+
]
4167

4268
@pytest.fixture
4369
def state_data():
@@ -294,6 +320,43 @@ def test_missing_field(self, author_missing_data):
294320
expected = DataFrame(ex_data)
295321
tm.assert_frame_equal(result, expected)
296322

323+
def test_fill_value(self, author_missing_data, deep_nested_missing):
324+
# GH16918
325+
result = json_normalize(
326+
author_missing_data,
327+
fill_value={'info.last_updated': '27/06/2019'})
328+
ex_data = [
329+
{'info': np.nan,
330+
'author_name.first': np.nan,
331+
'author_name.last_name': np.nan,
332+
'info.created_at': np.nan,
333+
'info.last_updated': '27/06/2019'},
334+
{'info': None,
335+
'author_name.first': 'Jane',
336+
'author_name.last_name': 'Doe',
337+
'info.created_at': '11/08/1993',
338+
'info.last_updated': '26/05/2012'}
339+
]
340+
expected = DataFrame(ex_data)
341+
print(result['info'], expected['info'])
342+
tm.assert_frame_equal(result, expected)
343+
344+
result = json_normalize(deep_nested_missing, ['states', 'cities'],
345+
meta=['country', ['states', 'name']],
346+
fill_value={'pop': 0, 'name': 'N/A'})
347+
# meta_prefix={'states': 'state_'})
348+
349+
ex_data = {'country': ['USA'] * 4 + ['Germany'] * 3,
350+
'states.name': ['California', 'California', 'Ohio', 'Ohio',
351+
'Bayern', 'Nordrhein-Westfalen',
352+
'Nordrhein-Westfalen'],
353+
'name': ['San Francisco', 'Los Angeles', 'Columbus',
354+
'N/A', 'Munich', 'Duesseldorf', 'Koeln'],
355+
'pop': [12345, 12346, 1234, 1236, 0, 1238, 0]}
356+
357+
expected = DataFrame(ex_data, columns=result.columns)
358+
tm.assert_frame_equal(result, expected)
359+
297360

298361
class TestNestedToRecord:
299362

0 commit comments

Comments
 (0)