Skip to content

Commit c33ee73

Browse files
charlesdong1991SeeminSyed
authored andcommitted
BUG: non-iterable value in meta raise error in json_normalize (pandas-dev#31524)
1 parent 54e1194 commit c33ee73

File tree

3 files changed

+28
-3
lines changed

3 files changed

+28
-3
lines changed

doc/source/whatsnew/v1.0.2.rst

+1
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ Bug fixes
7878
**I/O**
7979

8080
- Using ``pd.NA`` with :meth:`DataFrame.to_json` now correctly outputs a null value instead of an empty object (:issue:`31615`)
81+
- Bug in :meth:`pandas.json_normalize` when value in meta path is not iterable (:issue:`31507`)
8182
- Fixed pickling of ``pandas.NA``. Previously a new object was returned, which broke computations relying on ``NA`` being a singleton (:issue:`31847`)
8283
- Fixed bug in parquet roundtrip with nullable unsigned integer dtypes (:issue:`31896`).
8384

pandas/io/json/_normalize.py

+17-3
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import numpy as np
99

1010
from pandas._libs.writers import convert_json_to_lines
11+
from pandas._typing import Scalar
1112
from pandas.util._decorators import deprecate
1213

1314
import pandas as pd
@@ -226,14 +227,28 @@ def _json_normalize(
226227
Returns normalized data with columns prefixed with the given string.
227228
"""
228229

229-
def _pull_field(js: Dict[str, Any], spec: Union[List, str]) -> Iterable:
230+
def _pull_field(
231+
js: Dict[str, Any], spec: Union[List, str]
232+
) -> Union[Scalar, Iterable]:
233+
"""Internal function to pull field"""
230234
result = js # type: ignore
231235
if isinstance(spec, list):
232236
for field in spec:
233237
result = result[field]
234238
else:
235239
result = result[spec]
240+
return result
241+
242+
def _pull_records(js: Dict[str, Any], spec: Union[List, str]) -> Iterable:
243+
"""
244+
Interal function to pull field for records, and similar to
245+
_pull_field, but require to return Iterable. And will raise error
246+
if has non iterable value.
247+
"""
248+
result = _pull_field(js, spec)
236249

250+
# GH 31507 GH 30145, if result is not Iterable, raise TypeError if not
251+
# null, otherwise return an empty list
237252
if not isinstance(result, Iterable):
238253
if pd.isnull(result):
239254
result = [] # type: ignore
@@ -242,7 +257,6 @@ def _pull_field(js: Dict[str, Any], spec: Union[List, str]) -> Iterable:
242257
f"{js} has non iterable value {result} for path {spec}. "
243258
"Must be iterable or null."
244259
)
245-
246260
return result
247261

248262
if isinstance(data, list) and not data:
@@ -292,7 +306,7 @@ def _recursive_extract(data, path, seen_meta, level=0):
292306
_recursive_extract(obj[path[0]], path[1:], seen_meta, level=level + 1)
293307
else:
294308
for obj in data:
295-
recs = _pull_field(obj, path[0])
309+
recs = _pull_records(obj, path[0])
296310
recs = [
297311
nested_to_record(r, sep=sep, max_level=max_level)
298312
if isinstance(r, dict)

pandas/tests/io/json/test_normalize.py

+10
Original file line numberDiff line numberDiff line change
@@ -486,6 +486,16 @@ def test_non_interable_record_path_errors(self):
486486
with pytest.raises(TypeError, match=msg):
487487
json_normalize([test_input], record_path=[test_path])
488488

489+
def test_meta_non_iterable(self):
490+
# GH 31507
491+
data = """[{"id": 99, "data": [{"one": 1, "two": 2}]}]"""
492+
493+
result = json_normalize(json.loads(data), record_path=["data"], meta=["id"])
494+
expected = DataFrame(
495+
{"one": [1], "two": [2], "id": np.array([99], dtype=object)}
496+
)
497+
tm.assert_frame_equal(result, expected)
498+
489499

490500
class TestNestedToRecord:
491501
def test_flat_stays_flat(self):

0 commit comments

Comments
 (0)