Fix TypeError when pulling field that is None

bolkedebruin · WillAyd · bolkedebruin · commit b0cf30d5f304 · 2019-12-10T20:53:41.000+01:00
If normalizing a jsonstruct a field can be set to None
due to a schema change.

Co-Authored-By: William Ayd &lt;william.ayd@icloud.com&gt;
diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
@@ -774,7 +774,7 @@ I/O
 - Bug in :func:`read_json` where default encoding was not set to ``utf-8`` (:issue:`29565`)
 - Bug in :class:`PythonParser` where str and bytes were being mixed when dealing with the decimal field (:issue:`29650`)
 - :meth:`read_gbq` now accepts ``progress_bar_type`` to display progress bar while the data downloads. (:issue:`29857`)
--
+- Bug in :func:`pandas.io.json.json_normalize` where a missing value in the location specified by `record_path` would raise a ``TypeError`` (:issue:`30148`)
 
 Plotting
 ^^^^^^^^
diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py
@@ -10,6 +10,7 @@
 from pandas._libs.writers import convert_json_to_lines
 
 from pandas import DataFrame
+from pandas.api.types import is_integer, is_scalar
 
 
 def convert_to_line_delimits(s):
@@ -234,7 +235,10 @@ def _pull_field(js, spec):
             for field in spec:
                 result = result[field]
         else:
-            result = result[spec]
+            try:
+                result = result[spec]
+            except TypeError:
+                result = None
 
         return result
 
@@ -286,12 +290,15 @@ def _recursive_extract(data, path, seen_meta, level=0):
         else:
             for obj in data:
                 recs = _pull_field(obj, path[0])
-                recs = [
-                    nested_to_record(r, sep=sep, max_level=max_level)
-                    if isinstance(r, dict)
-                    else r
-                    for r in recs
-                ]
+                if not is_scalar(recs):
+                    recs = [
+                        nested_to_record(r, sep=sep, max_level=max_level)
+                        if isinstance(r, dict)
+                        else r
+                        for r in recs
+                    ]
+                else:
+                    recs = []
 
                 # For repeating the metadata later
                 lengths.append(len(recs))
diff --git a/pandas/tests/io/json/test_normalize.py b/pandas/tests/io/json/test_normalize.py
@@ -463,6 +463,19 @@ def test_nested_flattening_consistent(self):
         # They should be the same.
         tm.assert_frame_equal(df1, df2)
 
+    def test_nonetype_record_path(self, nulls_fixture):
+        # see gh-30148
+        # should not raise TypeError
+        df1 = json_normalize(
+            [
+                {"state": "Texas", "info": nulls_fixture},
+                {"state": "Florida", "info": [{"i": 2}]},
+            ],
+            record_path=["info"],
+        )
+        df2 = DataFrame({"i": 2}, index=[0])
+        tm.assert_equal(df1, df2)
+
 
 class TestNestedToRecord:
     def test_flat_stays_flat(self):