Skip to content

Commit 81e3e0f

Browse files
authored
ENH: Preserve Series index on json_normalize (#57422)
* Preserve index on json_normalize * Update unit tests * Update release notes * Pass linter * Set index in constructor * Use tm assert_index_equal in unit test * Update docstring and examples * Update release notes
1 parent 6d999c0 commit 81e3e0f

File tree

3 files changed

+46
-6
lines changed

3 files changed

+46
-6
lines changed

doc/source/whatsnew/v3.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ Other enhancements
3131
- :func:`DataFrame.to_excel` now raises an ``UserWarning`` when the character count in a cell exceeds Excel's limitation of 32767 characters (:issue:`56954`)
3232
- :func:`read_stata` now returns ``datetime64`` resolutions better matching those natively stored in the stata format (:issue:`55642`)
3333
- Allow dictionaries to be passed to :meth:`pandas.Series.str.replace` via ``pat`` parameter (:issue:`51748`)
34+
- Support passing a :class:`Series` input to :func:`json_normalize` that retains the :class:`Series` :class:`Index` (:issue:`51452`)
3435
-
3536

3637
.. ---------------------------------------------------------------------------

pandas/io/json/_normalize.py

+35-5
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,10 @@
1919
from pandas._libs.writers import convert_json_to_lines
2020

2121
import pandas as pd
22-
from pandas import DataFrame
22+
from pandas import (
23+
DataFrame,
24+
Series,
25+
)
2326

2427
if TYPE_CHECKING:
2528
from collections.abc import Iterable
@@ -266,7 +269,7 @@ def _simple_json_normalize(
266269

267270

268271
def json_normalize(
269-
data: dict | list[dict],
272+
data: dict | list[dict] | Series,
270273
record_path: str | list | None = None,
271274
meta: str | list[str | list[str]] | None = None,
272275
meta_prefix: str | None = None,
@@ -280,7 +283,7 @@ def json_normalize(
280283
281284
Parameters
282285
----------
283-
data : dict or list of dicts
286+
data : dict, list of dicts, or Series of dicts
284287
Unserialized JSON objects.
285288
record_path : str or list of str, default None
286289
Path in each object to list of records. If not passed, data will be
@@ -365,6 +368,26 @@ def json_normalize(
365368
1 NaN Mark Reg 130 60
366369
2 2.0 Faye Raker 130 60
367370
371+
>>> data = [
372+
... {
373+
... "id": 1,
374+
... "name": "Cole Volk",
375+
... "fitness": {"height": 130, "weight": 60},
376+
... },
377+
... {"name": "Mark Reg", "fitness": {"height": 130, "weight": 60}},
378+
... {
379+
... "id": 2,
380+
... "name": "Faye Raker",
381+
... "fitness": {"height": 130, "weight": 60},
382+
... },
383+
... ]
384+
>>> series = pd.Series(data, index=pd.Index(["a", "b", "c"]))
385+
>>> pd.json_normalize(series)
386+
id name fitness.height fitness.weight
387+
a 1.0 Cole Volk 130 60
388+
b NaN Mark Reg 130 60
389+
c 2.0 Faye Raker 130 60
390+
368391
>>> data = [
369392
... {
370393
... "state": "Florida",
@@ -455,6 +478,11 @@ def _pull_records(js: dict[str, Any], spec: list | str) -> list:
455478
)
456479
return result
457480

481+
if isinstance(data, Series):
482+
index = data.index
483+
else:
484+
index = None
485+
458486
if isinstance(data, list) and not data:
459487
return DataFrame()
460488
elif isinstance(data, dict):
@@ -477,7 +505,7 @@ def _pull_records(js: dict[str, Any], spec: list | str) -> list:
477505
and record_prefix is None
478506
and max_level is None
479507
):
480-
return DataFrame(_simple_json_normalize(data, sep=sep))
508+
return DataFrame(_simple_json_normalize(data, sep=sep), index=index)
481509

482510
if record_path is None:
483511
if any([isinstance(x, dict) for x in y.values()] for y in data):
@@ -489,7 +517,7 @@ def _pull_records(js: dict[str, Any], spec: list | str) -> list:
489517
# TODO: handle record value which are lists, at least error
490518
# reasonably
491519
data = nested_to_record(data, sep=sep, max_level=max_level)
492-
return DataFrame(data)
520+
return DataFrame(data, index=index)
493521
elif not isinstance(record_path, list):
494522
record_path = [record_path]
495523

@@ -564,4 +592,6 @@ def _recursive_extract(data, path, seen_meta, level: int = 0) -> None:
564592
values[i] = val
565593

566594
result[k] = values.repeat(lengths)
595+
if index is not None:
596+
result.index = index.repeat(lengths)
567597
return result

pandas/tests/io/json/test_normalize.py

+10-1
Original file line numberDiff line numberDiff line change
@@ -561,6 +561,14 @@ def test_top_column_with_leading_underscore(self):
561561

562562
tm.assert_frame_equal(result, expected)
563563

564+
def test_series_index(self, state_data):
565+
idx = Index([7, 8])
566+
series = Series(state_data, index=idx)
567+
result = json_normalize(series)
568+
tm.assert_index_equal(result.index, idx)
569+
result = json_normalize(series, "counties")
570+
tm.assert_index_equal(result.index, idx.repeat([3, 2]))
571+
564572

565573
class TestNestedToRecord:
566574
def test_flat_stays_flat(self):
@@ -891,6 +899,7 @@ def test_series_non_zero_index(self):
891899
"elements.a": [1.0, np.nan, np.nan],
892900
"elements.b": [np.nan, 2.0, np.nan],
893901
"elements.c": [np.nan, np.nan, 3.0],
894-
}
902+
},
903+
index=[1, 2, 3],
895904
)
896905
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)