diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 36b2aa3c28da5..8fac2f7737fc3 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -374,6 +374,7 @@ I/O ^^^ - :meth:`DataFrame.to_orc` now raising ``ValueError`` when non-default :class:`Index` is given (:issue:`51828`) - :meth:`DataFrame.to_sql` now raising ``ValueError`` when the name param is left empty while using SQLAlchemy to connect (:issue:`52675`) +- Bug in :func:`json_normalize`, fix json_normalize cannot parse metadata fields list type (:issue:`37782`) - Bug in :func:`read_hdf` not properly closing store after a ``IndexError`` is raised (:issue:`52781`) - Bug in :func:`read_html`, style elements were read into DataFrames (:issue:`52197`) - Bug in :func:`read_html`, tail texts were removed together with elements containing ``display:none`` style (:issue:`51629`) diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py index 0937828b00e38..459b4035627cc 100644 --- a/pandas/io/json/_normalize.py +++ b/pandas/io/json/_normalize.py @@ -535,5 +535,15 @@ def _recursive_extract(data, path, seen_meta, level: int = 0) -> None: raise ValueError( f"Conflicting metadata name {k}, need distinguishing prefix " ) - result[k] = np.array(v, dtype=object).repeat(lengths) + # GH 37782 + + values = np.array(v, dtype=object) + + if values.ndim > 1: + # GH 37782 + values = np.empty((len(v),), dtype=object) + for i, v in enumerate(v): + values[i] = v + + result[k] = values.repeat(lengths) return result diff --git a/pandas/tests/io/json/test_normalize.py b/pandas/tests/io/json/test_normalize.py index 78181fe2c4729..316f262885424 100644 --- a/pandas/tests/io/json/test_normalize.py +++ b/pandas/tests/io/json/test_normalize.py @@ -170,6 +170,20 @@ def test_simple_normalize(self, state_data): tm.assert_frame_equal(result, expected) + def test_fields_list_type_normalize(self): + parse_metadata_fields_list_type = [ + {"values": [1, 2, 3], "metadata": {"listdata": [1, 2]}} + ] + result = json_normalize( + parse_metadata_fields_list_type, + record_path=["values"], + meta=[["metadata", "listdata"]], + ) + expected = DataFrame( + {0: [1, 2, 3], "metadata.listdata": [[1, 2], [1, 2], [1, 2]]} + ) + tm.assert_frame_equal(result, expected) + def test_empty_array(self): result = json_normalize([]) expected = DataFrame()