Skip to content

Commit 1f1917d

Browse files
committed
BUG: closes #44312: fixes unwanted TypeError when a missing metadata field is missing
1 parent a3bcbf8 commit 1f1917d

File tree

3 files changed

+30
-0
lines changed

3 files changed

+30
-0
lines changed

doc/source/whatsnew/v1.4.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -569,6 +569,7 @@ I/O
569569
- Bug in :func:`read_csv`, changed exception class when expecting a file path name or file-like object from ``OSError`` to ``TypeError`` (:issue:`43366`)
570570
- Bug in :func:`read_json` not handling non-numpy dtypes correctly (especially ``category``) (:issue:`21892`, :issue:`33205`)
571571
- Bug in :func:`json_normalize` where multi-character ``sep`` parameter is incorrectly prefixed to every key (:issue:`43831`)
572+
- Bug in :func:`json_normalize` where reading data with missing multi-level metadata would not respect errors="ignore" (:issue:`44312`)
572573
- Bug in :func:`read_csv` with :code:`float_precision="round_trip"` which did not skip initial/trailing whitespace (:issue:`43713`)
573574
- Bug in dumping/loading a :class:`DataFrame` with ``yaml.dump(frame)`` (:issue:`42748`)
574575
-

pandas/io/json/_normalize.py

+2
Original file line numberDiff line numberDiff line change
@@ -389,6 +389,8 @@ def _pull_field(
389389
try:
390390
if isinstance(spec, list):
391391
for field in spec:
392+
if result is None:
393+
raise KeyError(field)
392394
result = result[field]
393395
else:
394396
result = result[spec]

pandas/tests/io/json/test_normalize.py

+27
Original file line numberDiff line numberDiff line change
@@ -634,6 +634,33 @@ def test_missing_meta(self, missing_metadata):
634634
expected = DataFrame(ex_data, columns=columns)
635635
tm.assert_frame_equal(result, expected)
636636

637+
def test_missing_nested_meta(self):
638+
# GH44312
639+
# If errors="ignore" and nested metadata is null, we should return nan
640+
data = {"meta": "foo", "nested_meta": None, "value": [{"rec": 1}, {"rec": 2}]}
641+
result = json_normalize(
642+
data,
643+
record_path="value",
644+
meta=["meta", ["nested_meta", "leaf"]],
645+
errors="ignore",
646+
)
647+
ex_data = [[1, "foo", np.nan], [2, "foo", np.nan]]
648+
columns = ["rec", "meta", "nested_meta.leaf"]
649+
expected = DataFrame(ex_data, columns=columns).astype(
650+
{"nested_meta.leaf": object}
651+
)
652+
tm.assert_frame_equal(result, expected)
653+
654+
# If errors="raise" and nested metadata is null, we should raise with the
655+
# key of the first missing level
656+
with pytest.raises(KeyError, match="'leaf' not found"):
657+
json_normalize(
658+
data,
659+
record_path="value",
660+
meta=["meta", ["nested_meta", "leaf"]],
661+
errors="raise",
662+
)
663+
637664
def test_missing_meta_multilevel_record_path_errors_raise(self, missing_metadata):
638665
# GH41876
639666
# Ensure errors='raise' works as intended even when a record_path of length

0 commit comments

Comments
 (0)