diff --git a/doc/source/whatsnew/v1.0.2.rst b/doc/source/whatsnew/v1.0.2.rst index 462f243f14494..61d730f7a285b 100644 --- a/doc/source/whatsnew/v1.0.2.rst +++ b/doc/source/whatsnew/v1.0.2.rst @@ -75,6 +75,7 @@ Bug fixes **I/O** - Using ``pd.NA`` with :meth:`DataFrame.to_json` now correctly outputs a null value instead of an empty object (:issue:`31615`) +- Bug in :meth:`pandas.json_normalize` when value in meta path is not iterable (:issue:`31507`) - Fixed pickling of ``pandas.NA``. Previously a new object was returned, which broke computations relying on ``NA`` being a singleton (:issue:`31847`) - Fixed bug in parquet roundtrip with nullable unsigned integer dtypes (:issue:`31896`). diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py index 4b153d3cb69bf..6e68c1cf5e27e 100644 --- a/pandas/io/json/_normalize.py +++ b/pandas/io/json/_normalize.py @@ -8,6 +8,7 @@ import numpy as np from pandas._libs.writers import convert_json_to_lines +from pandas._typing import Scalar from pandas.util._decorators import deprecate import pandas as pd @@ -226,14 +227,28 @@ def _json_normalize( Returns normalized data with columns prefixed with the given string. """ - def _pull_field(js: Dict[str, Any], spec: Union[List, str]) -> Iterable: + def _pull_field( + js: Dict[str, Any], spec: Union[List, str] + ) -> Union[Scalar, Iterable]: + """Internal function to pull field""" result = js # type: ignore if isinstance(spec, list): for field in spec: result = result[field] else: result = result[spec] + return result + + def _pull_records(js: Dict[str, Any], spec: Union[List, str]) -> Iterable: + """ + Interal function to pull field for records, and similar to + _pull_field, but require to return Iterable. And will raise error + if has non iterable value. + """ + result = _pull_field(js, spec) + # GH 31507 GH 30145, if result is not Iterable, raise TypeError if not + # null, otherwise return an empty list if not isinstance(result, Iterable): if pd.isnull(result): result = [] # type: ignore @@ -242,7 +257,6 @@ def _pull_field(js: Dict[str, Any], spec: Union[List, str]) -> Iterable: f"{js} has non iterable value {result} for path {spec}. " "Must be iterable or null." ) - return result if isinstance(data, list) and not data: @@ -292,7 +306,7 @@ def _recursive_extract(data, path, seen_meta, level=0): _recursive_extract(obj[path[0]], path[1:], seen_meta, level=level + 1) else: for obj in data: - recs = _pull_field(obj, path[0]) + recs = _pull_records(obj, path[0]) recs = [ nested_to_record(r, sep=sep, max_level=max_level) if isinstance(r, dict) diff --git a/pandas/tests/io/json/test_normalize.py b/pandas/tests/io/json/test_normalize.py index 91b204ed41ebc..b7a9918ff46da 100644 --- a/pandas/tests/io/json/test_normalize.py +++ b/pandas/tests/io/json/test_normalize.py @@ -486,6 +486,16 @@ def test_non_interable_record_path_errors(self): with pytest.raises(TypeError, match=msg): json_normalize([test_input], record_path=[test_path]) + def test_meta_non_iterable(self): + # GH 31507 + data = """[{"id": 99, "data": [{"one": 1, "two": 2}]}]""" + + result = json_normalize(json.loads(data), record_path=["data"], meta=["id"]) + expected = DataFrame( + {"one": [1], "two": [2], "id": np.array([99], dtype=object)} + ) + tm.assert_frame_equal(result, expected) + class TestNestedToRecord: def test_flat_stays_flat(self):