Skip to content

Commit d5d6430

Browse files
charlesdong1991meeseeksmachine
authored andcommitted
Backport PR pandas-dev#31524: BUG: non-iterable value in meta raise error in json_normalize
1 parent 931c65f commit d5d6430

File tree

3 files changed

+28
-3
lines changed

3 files changed

+28
-3
lines changed

doc/source/whatsnew/v1.0.2.rst

+1
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ Bug fixes
7878
**I/O**
7979

8080
- Using ``pd.NA`` with :meth:`DataFrame.to_json` now correctly outputs a null value instead of an empty object (:issue:`31615`)
81+
- Bug in :meth:`pandas.json_normalize` when value in meta path is not iterable (:issue:`31507`)
8182
- Fixed pickling of ``pandas.NA``. Previously a new object was returned, which broke computations relying on ``NA`` being a singleton (:issue:`31847`)
8283
- Fixed bug in parquet roundtrip with nullable unsigned integer dtypes (:issue:`31896`).
8384

pandas/io/json/_normalize.py

+17-3
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import numpy as np
99

1010
from pandas._libs.writers import convert_json_to_lines
11+
from pandas._typing import Scalar
1112
from pandas.util._decorators import deprecate
1213

1314
import pandas as pd
@@ -230,14 +231,28 @@ def _json_normalize(
230231
Returns normalized data with columns prefixed with the given string.
231232
"""
232233

233-
def _pull_field(js: Dict[str, Any], spec: Union[List, str]) -> Iterable:
234+
def _pull_field(
235+
js: Dict[str, Any], spec: Union[List, str]
236+
) -> Union[Scalar, Iterable]:
237+
"""Internal function to pull field"""
234238
result = js # type: ignore
235239
if isinstance(spec, list):
236240
for field in spec:
237241
result = result[field]
238242
else:
239243
result = result[spec]
244+
return result
245+
246+
def _pull_records(js: Dict[str, Any], spec: Union[List, str]) -> Iterable:
247+
"""
248+
Interal function to pull field for records, and similar to
249+
_pull_field, but require to return Iterable. And will raise error
250+
if has non iterable value.
251+
"""
252+
result = _pull_field(js, spec)
240253

254+
# GH 31507 GH 30145, if result is not Iterable, raise TypeError if not
255+
# null, otherwise return an empty list
241256
if not isinstance(result, Iterable):
242257
if pd.isnull(result):
243258
result = [] # type: ignore
@@ -246,7 +261,6 @@ def _pull_field(js: Dict[str, Any], spec: Union[List, str]) -> Iterable:
246261
f"{js} has non iterable value {result} for path {spec}. "
247262
"Must be iterable or null."
248263
)
249-
250264
return result
251265

252266
if isinstance(data, list) and not data:
@@ -296,7 +310,7 @@ def _recursive_extract(data, path, seen_meta, level=0):
296310
_recursive_extract(obj[path[0]], path[1:], seen_meta, level=level + 1)
297311
else:
298312
for obj in data:
299-
recs = _pull_field(obj, path[0])
313+
recs = _pull_records(obj, path[0])
300314
recs = [
301315
nested_to_record(r, sep=sep, max_level=max_level)
302316
if isinstance(r, dict)

pandas/tests/io/json/test_normalize.py

+10
Original file line numberDiff line numberDiff line change
@@ -486,6 +486,16 @@ def test_non_interable_record_path_errors(self):
486486
with pytest.raises(TypeError, match=msg):
487487
json_normalize([test_input], record_path=[test_path])
488488

489+
def test_meta_non_iterable(self):
490+
# GH 31507
491+
data = """[{"id": 99, "data": [{"one": 1, "two": 2}]}]"""
492+
493+
result = json_normalize(json.loads(data), record_path=["data"], meta=["id"])
494+
expected = DataFrame(
495+
{"one": [1], "two": [2], "id": np.array([99], dtype=object)}
496+
)
497+
tm.assert_frame_equal(result, expected)
498+
489499

490500
class TestNestedToRecord:
491501
def test_flat_stays_flat(self):

0 commit comments

Comments
 (0)