Skip to content

Commit bcaa92c

Browse files
bolkedebruinWillAyd
andcommitted
Fix TypeError when pulling field that is None
If normalizing a jsonstruct a field can be set to None due to a schema change. Co-Authored-By: William Ayd <[email protected]>
1 parent 1d36851 commit bcaa92c

File tree

3 files changed

+38
-3
lines changed

3 files changed

+38
-3
lines changed

doc/source/whatsnew/v1.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -832,6 +832,7 @@ I/O
832832
- Bug in :func:`read_json` where default encoding was not set to ``utf-8`` (:issue:`29565`)
833833
- Bug in :class:`PythonParser` where str and bytes were being mixed when dealing with the decimal field (:issue:`29650`)
834834
- :meth:`read_gbq` now accepts ``progress_bar_type`` to display progress bar while the data downloads. (:issue:`29857`)
835+
- Bug in :func:`pandas.io.json.json_normalize` where a missing value in the location specified by `record_path` would raise a ``TypeError`` (:issue:`30148`)
835836

836837
Plotting
837838
^^^^^^^^

pandas/io/json/_normalize.py

+13-3
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,14 @@
33

44
from collections import defaultdict
55
import copy
6-
from typing import DefaultDict, Dict, List, Optional, Union
6+
from typing import Any, DefaultDict, Dict, Iterable, List, Optional, Union
77

88
import numpy as np
99

1010
from pandas._libs.writers import convert_json_to_lines
1111
from pandas.util._decorators import deprecate
1212

13+
import pandas as pd
1314
from pandas import DataFrame
1415

1516

@@ -229,14 +230,23 @@ def _json_normalize(
229230
Returns normalized data with columns prefixed with the given string.
230231
"""
231232

232-
def _pull_field(js, spec):
233-
result = js
233+
def _pull_field(js: Dict[str, Any], spec: Union[List, str]) -> Iterable:
234+
result = js # type: ignore
234235
if isinstance(spec, list):
235236
for field in spec:
236237
result = result[field]
237238
else:
238239
result = result[spec]
239240

241+
if not isinstance(result, Iterable):
242+
if pd.isnull(result):
243+
result = [] # type: ignore
244+
else:
245+
raise TypeError(
246+
f"{js} has non iterable value {result} for path {spec}. "
247+
"Must be iterable or null."
248+
)
249+
240250
return result
241251

242252
if isinstance(data, list) and not data:

pandas/tests/io/json/test_normalize.py

+24
Original file line numberDiff line numberDiff line change
@@ -462,6 +462,30 @@ def test_nested_flattening_consistent(self):
462462
# They should be the same.
463463
tm.assert_frame_equal(df1, df2)
464464

465+
def test_nonetype_record_path(self, nulls_fixture):
466+
# see gh-30148
467+
# should not raise TypeError
468+
result = json_normalize(
469+
[
470+
{"state": "Texas", "info": nulls_fixture},
471+
{"state": "Florida", "info": [{"i": 2}]},
472+
],
473+
record_path=["info"],
474+
)
475+
expected = DataFrame({"i": 2}, index=[0])
476+
tm.assert_equal(result, expected)
477+
478+
def test_non_interable_record_path_errors(self):
479+
# see gh-30148
480+
test_input = {"state": "Texas", "info": 1}
481+
test_path = "info"
482+
msg = (
483+
f"{test_input} has non iterable value 1 for path {test_path}. "
484+
"Must be iterable or null."
485+
)
486+
with pytest.raises(TypeError, match=msg):
487+
json_normalize([test_input], record_path=[test_path])
488+
465489

466490
class TestNestedToRecord:
467491
def test_flat_stays_flat(self):

0 commit comments

Comments
 (0)