Fix TypeError when pulling field that is None

bolkedebruin · WillAyd · bolkedebruin · commit bcaa92c293fb · 2019-12-28T14:11:35.000+01:00
If normalizing a jsonstruct a field can be set to None
due to a schema change.

Co-Authored-By: William Ayd &lt;william.ayd@icloud.com&gt;
diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
@@ -832,6 +832,7 @@ I/O
 - Bug in :func:`read_json` where default encoding was not set to ``utf-8`` (:issue:`29565`)
 - Bug in :class:`PythonParser` where str and bytes were being mixed when dealing with the decimal field (:issue:`29650`)
 - :meth:`read_gbq` now accepts ``progress_bar_type`` to display progress bar while the data downloads. (:issue:`29857`)
+- Bug in :func:`pandas.io.json.json_normalize` where a missing value in the location specified by `record_path` would raise a ``TypeError`` (:issue:`30148`)
 
 Plotting
 ^^^^^^^^
diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py
@@ -3,13 +3,14 @@
 
 from collections import defaultdict
 import copy
-from typing import DefaultDict, Dict, List, Optional, Union
+from typing import Any, DefaultDict, Dict, Iterable, List, Optional, Union
 
 import numpy as np
 
 from pandas._libs.writers import convert_json_to_lines
 from pandas.util._decorators import deprecate
 
+import pandas as pd
 from pandas import DataFrame
 
 
@@ -229,14 +230,23 @@ def _json_normalize(
     Returns normalized data with columns prefixed with the given string.
     """
 
-    def _pull_field(js, spec):
-        result = js
+    def _pull_field(js: Dict[str, Any], spec: Union[List, str]) -> Iterable:
+        result = js  # type: ignore
         if isinstance(spec, list):
             for field in spec:
                 result = result[field]
         else:
             result = result[spec]
 
+        if not isinstance(result, Iterable):
+            if pd.isnull(result):
+                result = []  # type: ignore
+            else:
+                raise TypeError(
+                    f"{js} has non iterable value {result} for path {spec}. "
+                    "Must be iterable or null."
+                )
+
         return result
 
     if isinstance(data, list) and not data:
diff --git a/pandas/tests/io/json/test_normalize.py b/pandas/tests/io/json/test_normalize.py
@@ -462,6 +462,30 @@ def test_nested_flattening_consistent(self):
         # They should be the same.
         tm.assert_frame_equal(df1, df2)
 
+    def test_nonetype_record_path(self, nulls_fixture):
+        # see gh-30148
+        # should not raise TypeError
+        result = json_normalize(
+            [
+                {"state": "Texas", "info": nulls_fixture},
+                {"state": "Florida", "info": [{"i": 2}]},
+            ],
+            record_path=["info"],
+        )
+        expected = DataFrame({"i": 2}, index=[0])
+        tm.assert_equal(result, expected)
+
+    def test_non_interable_record_path_errors(self):
+        # see gh-30148
+        test_input = {"state": "Texas", "info": 1}
+        test_path = "info"
+        msg = (
+            f"{test_input} has non iterable value 1 for path {test_path}. "
+            "Must be iterable or null."
+        )
+        with pytest.raises(TypeError, match=msg):
+            json_normalize([test_input], record_path=[test_path])
+
 
 class TestNestedToRecord:
     def test_flat_stays_flat(self):