From 38b6339e425cc158f936ddfb9cb6f3b690b2f759 Mon Sep 17 00:00:00 2001 From: Felipe Maion Date: Fri, 5 May 2023 08:03:41 -0300 Subject: [PATCH 1/8] Fix BUG: #37782 --- pandas/io/json/_normalize.py | 12 +++++++++++- pandas/tests/io/json/test_normalize.py | 16 ++++++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py index 0937828b00e38..672b6527c1fe0 100644 --- a/pandas/io/json/_normalize.py +++ b/pandas/io/json/_normalize.py @@ -535,5 +535,15 @@ def _recursive_extract(data, path, seen_meta, level: int = 0) -> None: raise ValueError( f"Conflicting metadata name {k}, need distinguishing prefix " ) - result[k] = np.array(v, dtype=object).repeat(lengths) + #### FIX BUG #37782: https://github.com/pandas-dev/pandas/issues/37782 + + values = np.array(v, dtype=object) + + if values.ndim > 1: + # GH#37782 + values = np.empty((len(v),), dtype=object) + for i, v in enumerate(v): + values[i] = v + + result[k] = values.repeat(lengths) return result diff --git a/pandas/tests/io/json/test_normalize.py b/pandas/tests/io/json/test_normalize.py index 78181fe2c4729..b3c05ae69ac9d 100644 --- a/pandas/tests/io/json/test_normalize.py +++ b/pandas/tests/io/json/test_normalize.py @@ -137,6 +137,11 @@ def max_level_test_input_data(): ] +@pytest.fixture +def parse_metadata_fields_list_type(): + return [{"values": [1, 2, 3], "metadata": {"listdata": [1, 2]}}] + + class TestJSONNormalize: def test_simple_records(self): recs = [ @@ -170,6 +175,17 @@ def test_simple_normalize(self, state_data): tm.assert_frame_equal(result, expected) + def test_fields_list_type_normalize(self, parse_metadata_fields_list_type): + result = json_normalize( + parse_metadata_fields_list_type, + record_path=["values"], + meta=[["metadata", "listdata"]], + ) + expected = DataFrame( + {0: [1, 2, 3], "metadata.listdata": [[1, 2], [1, 2], [1, 2]]} + ) + tm.assert_frame_equal(result, expected) + def test_empty_array(self): result = json_normalize([]) expected = DataFrame() From ff1907df773e485409615a42d5c6b7a76d48941d Mon Sep 17 00:00:00 2001 From: Felipe Maion Date: Fri, 5 May 2023 08:12:58 -0300 Subject: [PATCH 2/8] Fix BUG: #37782 --- doc/source/whatsnew/v2.1.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index da1b2e750392c..3a6b7f0ba22e9 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -372,10 +372,10 @@ I/O ^^^ - :meth:`DataFrame.to_orc` now raising ``ValueError`` when non-default :class:`Index` is given (:issue:`51828`) - :meth:`DataFrame.to_sql` now raising ``ValueError`` when the name param is left empty while using SQLAlchemy to connect (:issue:`52675`) +- Bug in :func:`json_normalize`, json_normalize cannot parse metadata fields list type (:issue:`#37782`) - Bug in :func:`read_hdf` not properly closing store after a ``IndexError`` is raised (:issue:`52781`) - Bug in :func:`read_html`, style elements were read into DataFrames (:issue:`52197`) - Bug in :func:`read_html`, tail texts were removed together with elements containing ``display:none`` style (:issue:`51629`) -- Period ^^^^^^ From 190d50c5577b8753d805756c7fc7b8468e1ef2c0 Mon Sep 17 00:00:00 2001 From: Felipe Maion Date: Fri, 5 May 2023 19:50:12 -0300 Subject: [PATCH 3/8] Fix BUG: 37782 - Hardcoded test data --- pandas/tests/io/json/test_normalize.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/pandas/tests/io/json/test_normalize.py b/pandas/tests/io/json/test_normalize.py index b3c05ae69ac9d..316f262885424 100644 --- a/pandas/tests/io/json/test_normalize.py +++ b/pandas/tests/io/json/test_normalize.py @@ -137,11 +137,6 @@ def max_level_test_input_data(): ] -@pytest.fixture -def parse_metadata_fields_list_type(): - return [{"values": [1, 2, 3], "metadata": {"listdata": [1, 2]}}] - - class TestJSONNormalize: def test_simple_records(self): recs = [ @@ -175,7 +170,10 @@ def test_simple_normalize(self, state_data): tm.assert_frame_equal(result, expected) - def test_fields_list_type_normalize(self, parse_metadata_fields_list_type): + def test_fields_list_type_normalize(self): + parse_metadata_fields_list_type = [ + {"values": [1, 2, 3], "metadata": {"listdata": [1, 2]}} + ] result = json_normalize( parse_metadata_fields_list_type, record_path=["values"], From 30a754f123e09f83319892cf01ba9cde33027e54 Mon Sep 17 00:00:00 2001 From: Felipe Maion Date: Fri, 5 May 2023 19:52:21 -0300 Subject: [PATCH 4/8] Fix BUG: 37782 - Hardcoded test data --- doc/source/whatsnew/v2.1.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 3a6b7f0ba22e9..b42032199dadf 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -372,7 +372,7 @@ I/O ^^^ - :meth:`DataFrame.to_orc` now raising ``ValueError`` when non-default :class:`Index` is given (:issue:`51828`) - :meth:`DataFrame.to_sql` now raising ``ValueError`` when the name param is left empty while using SQLAlchemy to connect (:issue:`52675`) -- Bug in :func:`json_normalize`, json_normalize cannot parse metadata fields list type (:issue:`#37782`) +- Bug in :func:`json_normalize`, fix json_normalize cannot parse metadata fields list type (:issue:`37782`) - Bug in :func:`read_hdf` not properly closing store after a ``IndexError`` is raised (:issue:`52781`) - Bug in :func:`read_html`, style elements were read into DataFrames (:issue:`52197`) - Bug in :func:`read_html`, tail texts were removed together with elements containing ``display:none`` style (:issue:`51629`) From 600208c4ee652b8b43d27a1265ae3cc7cde47ae7 Mon Sep 17 00:00:00 2001 From: Felipe Maion Date: Fri, 5 May 2023 20:20:27 -0300 Subject: [PATCH 5/8] Update pandas/io/json/_normalize.py Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- pandas/io/json/_normalize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py index 672b6527c1fe0..6d6707fa73625 100644 --- a/pandas/io/json/_normalize.py +++ b/pandas/io/json/_normalize.py @@ -535,7 +535,7 @@ def _recursive_extract(data, path, seen_meta, level: int = 0) -> None: raise ValueError( f"Conflicting metadata name {k}, need distinguishing prefix " ) - #### FIX BUG #37782: https://github.com/pandas-dev/pandas/issues/37782 + #GH 37782 values = np.array(v, dtype=object) From 9e25b989948b2b92f755d15ace8132d944265cb2 Mon Sep 17 00:00:00 2001 From: Felipe Maion Date: Fri, 5 May 2023 21:18:58 -0300 Subject: [PATCH 6/8] Fix BUG: 37782 - typo --- pandas/io/json/_normalize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py index 6d6707fa73625..24de14e7f98a1 100644 --- a/pandas/io/json/_normalize.py +++ b/pandas/io/json/_normalize.py @@ -540,7 +540,7 @@ def _recursive_extract(data, path, seen_meta, level: int = 0) -> None: values = np.array(v, dtype=object) if values.ndim > 1: - # GH#37782 + # GH 37782 values = np.empty((len(v),), dtype=object) for i, v in enumerate(v): values[i] = v From df467472b353f707ed9620ba119d04dc612b1ef4 Mon Sep 17 00:00:00 2001 From: Felipe Maion Date: Fri, 5 May 2023 21:50:13 -0300 Subject: [PATCH 7/8] Fix BUG: 37782 - typo --- pandas/io/json/_normalize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py index 24de14e7f98a1..459b4035627cc 100644 --- a/pandas/io/json/_normalize.py +++ b/pandas/io/json/_normalize.py @@ -535,7 +535,7 @@ def _recursive_extract(data, path, seen_meta, level: int = 0) -> None: raise ValueError( f"Conflicting metadata name {k}, need distinguishing prefix " ) - #GH 37782 + # GH 37782 values = np.array(v, dtype=object) From 47b6166cb63bb33b7c529173f9d18d1f83f44549 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 8 May 2023 09:44:17 -0700 Subject: [PATCH 8/8] Update doc/source/whatsnew/v2.1.0.rst --- doc/source/whatsnew/v2.1.0.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index dfc3480046281..8fac2f7737fc3 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -380,7 +380,6 @@ I/O - Bug in :func:`read_html`, tail texts were removed together with elements containing ``display:none`` style (:issue:`51629`) - Bug in displaying a :class:`MultiIndex` with a long element (:issue:`52960`) - Period ^^^^^^ - :meth:`PeriodIndex.map` with ``na_action="ignore"`` now works as expected (:issue:`51644`)