Skip to content

DOC: Fix docs for io/json/* #41284

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
May 4, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions ci/code_checks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then
pandas/core/window/ \
pandas/errors/ \
pandas/io/clipboard/ \
pandas/io/json/ \
pandas/io/excel/ \
pandas/io/parsers/ \
pandas/io/sas/ \
Expand Down
30 changes: 20 additions & 10 deletions pandas/io/json/_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -526,9 +526,13 @@ def read_json(
Encoding/decoding a Dataframe using ``'split'`` formatted JSON:

>>> df.to_json(orient='split')
'{{"columns":["col 1","col 2"],
"index":["row 1","row 2"],
"data":[["a","b"],["c","d"]]}}'
'\
{{\
"columns":["col 1","col 2"],\
"index":["row 1","row 2"],\
"data":[["a","b"],["c","d"]]\
}}\
'
>>> pd.read_json(_, orient='split')
col 1 col 2
row 1 a b
Expand All @@ -538,6 +542,7 @@ def read_json(

>>> df.to_json(orient='index')
'{{"row 1":{{"col 1":"a","col 2":"b"}},"row 2":{{"col 1":"c","col 2":"d"}}}}'

>>> pd.read_json(_, orient='index')
col 1 col 2
row 1 a b
Expand All @@ -556,13 +561,18 @@ def read_json(
Encoding with Table Schema

>>> df.to_json(orient='table')
'{{"schema": {{"fields": [{{"name": "index", "type": "string"}},
{{"name": "col 1", "type": "string"}},
{{"name": "col 2", "type": "string"}}],
"primaryKey": "index",
"pandas_version": "0.20.0"}},
"data": [{{"index": "row 1", "col 1": "a", "col 2": "b"}},
{{"index": "row 2", "col 1": "c", "col 2": "d"}}]}}'
'\
{{"schema":{{"fields":[\
{{"name":"index","type":"string"}},\
{{"name":"col 1","type":"string"}},\
{{"name":"col 2","type":"string"}}],\
"primaryKey":["index"],\
"pandas_version":"0.20.0"}},\
"data":[\
{{"index":"row 1","col 1":"a","col 2":"b"}},\
{{"index":"row 2","col 1":"c","col 2":"d"}}]\
}}\
'
"""
if orient == "table" and dtype:
raise ValueError("cannot pass both dtype and orient='table'")
Expand Down
136 changes: 83 additions & 53 deletions pandas/io/json/_normalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,15 +70,17 @@ def nested_to_record(

Examples
--------
IN[52]: nested_to_record(dict(flat1=1,dict1=dict(c=1,d=2),
nested=dict(e=dict(c=1,d=2),d=2)))
Out[52]:
{'dict1.c': 1,
'dict1.d': 2,
'flat1': 1,
'nested.d': 2,
'nested.e.c': 1,
'nested.e.d': 2}
>>> nested_to_record(
... dict(flat1=1, dict1=dict(c=1, d=2), nested=dict(e=dict(c=1, d=2), d=2))
... )
{\
'flat1': 1, \
'dict1.c': 1, \
'dict1.d': 2, \
'nested.e.c': 1, \
'nested.e.d': 2, \
'nested.d': 2\
}
"""
singleton = False
if isinstance(ds, dict):
Expand Down Expand Up @@ -208,18 +210,21 @@ def _simple_json_normalize(

Examples
--------
IN[52]: _simple_json_normalize({
'flat1': 1,
'dict1': {'c': 1, 'd': 2},
'nested': {'e': {'c': 1, 'd': 2}, 'd': 2}
})
Out[52]:
{'dict1.c': 1,
'dict1.d': 2,
'flat1': 1,
'nested.d': 2,
'nested.e.c': 1,
'nested.e.d': 2}
>>> _simple_json_normalize(
... {
... "flat1": 1,
... "dict1": {"c": 1, "d": 2},
... "nested": {"e": {"c": 1, "d": 2}, "d": 2},
... }
... )
{\
'flat1': 1, \
'dict1.c': 1, \
'dict1.d': 2, \
'nested.e.c': 1, \
'nested.e.d': 2, \
'nested.d': 2\
}

"""
normalised_json_object = {}
Expand Down Expand Up @@ -283,22 +288,30 @@ def _json_normalize(

Examples
--------
>>> data = [{'id': 1, 'name': {'first': 'Coleen', 'last': 'Volk'}},
... {'name': {'given': 'Mark', 'family': 'Regner'}},
... {'id': 2, 'name': 'Faye Raker'}]
>>> data = [
... {"id": 1, "name": {"first": "Coleen", "last": "Volk"}},
... {"name": {"given": "Mark", "family": "Regner"}},
... {"id": 2, "name": "Faye Raker"},
... ]
>>> pd.json_normalize(data)
id name.first name.last name.given name.family name
0 1.0 Coleen Volk NaN NaN NaN
1 NaN NaN NaN Mark Regner NaN
2 2.0 NaN NaN NaN NaN Faye Raker

>>> data = [{'id': 1,
... 'name': "Cole Volk",
... 'fitness': {'height': 130, 'weight': 60}},
... {'name': "Mark Reg",
... 'fitness': {'height': 130, 'weight': 60}},
... {'id': 2, 'name': 'Faye Raker',
... 'fitness': {'height': 130, 'weight': 60}}]
>>> data = [
... {
... "id": 1,
... "name": "Cole Volk",
... "fitness": {"height": 130, "weight": 60},
... },
... {"name": "Mark Reg", "fitness": {"height": 130, "weight": 60}},
... {
... "id": 2,
... "name": "Faye Raker",
... "fitness": {"height": 130, "weight": 60},
... },
... ]
>>> pd.json_normalize(data, max_level=0)
id name fitness
0 1.0 Cole Volk {'height': 130, 'weight': 60}
Expand All @@ -307,32 +320,49 @@ def _json_normalize(

Normalizes nested data up to level 1.

>>> data = [{'id': 1,
... 'name': "Cole Volk",
... 'fitness': {'height': 130, 'weight': 60}},
... {'name': "Mark Reg",
... 'fitness': {'height': 130, 'weight': 60}},
... {'id': 2, 'name': 'Faye Raker',
... 'fitness': {'height': 130, 'weight': 60}}]
>>> data = [
... {
... "id": 1,
... "name": "Cole Volk",
... "fitness": {"height": 130, "weight": 60},
... },
... {"name": "Mark Reg", "fitness": {"height": 130, "weight": 60}},
... {
... "id": 2,
... "name": "Faye Raker",
... "fitness": {"height": 130, "weight": 60},
... },
... ]
>>> pd.json_normalize(data, max_level=1)
id name fitness.height fitness.weight
0 1.0 Cole Volk 130 60
1 NaN Mark Reg 130 60
2 2.0 Faye Raker 130 60

>>> data = [{'state': 'Florida',
... 'shortname': 'FL',
... 'info': {'governor': 'Rick Scott'},
... 'counties': [{'name': 'Dade', 'population': 12345},
... {'name': 'Broward', 'population': 40000},
... {'name': 'Palm Beach', 'population': 60000}]},
... {'state': 'Ohio',
... 'shortname': 'OH',
... 'info': {'governor': 'John Kasich'},
... 'counties': [{'name': 'Summit', 'population': 1234},
... {'name': 'Cuyahoga', 'population': 1337}]}]
>>> result = pd.json_normalize(data, 'counties', ['state', 'shortname',
... ['info', 'governor']])
>>> data = [
... {
... "state": "Florida",
... "shortname": "FL",
... "info": {"governor": "Rick Scott"},
... "counties": [
... {"name": "Dade", "population": 12345},
... {"name": "Broward", "population": 40000},
... {"name": "Palm Beach", "population": 60000},
... ],
... },
... {
... "state": "Ohio",
... "shortname": "OH",
... "info": {"governor": "John Kasich"},
... "counties": [
... {"name": "Summit", "population": 1234},
... {"name": "Cuyahoga", "population": 1337},
... ],
... },
... ]
>>> result = pd.json_normalize(
... data, "counties", ["state", "shortname", ["info", "governor"]]
... )
>>> result
name population state shortname info.governor
0 Dade 12345 Florida FL Rick Scott
Expand All @@ -341,8 +371,8 @@ def _json_normalize(
3 Summit 1234 Ohio OH John Kasich
4 Cuyahoga 1337 Ohio OH John Kasich

>>> data = {'A': [1, 2]}
>>> pd.json_normalize(data, 'A', record_prefix='Prefix.')
>>> data = {"A": [1, 2]}
>>> pd.json_normalize(data, "A", record_prefix="Prefix.")
Prefix.0
0 1
1 2
Expand Down
43 changes: 24 additions & 19 deletions pandas/io/json/_table_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,21 +155,25 @@ def convert_json_field_to_pandas_type(field):

Examples
--------
>>> convert_json_field_to_pandas_type({'name': 'an_int',
'type': 'integer'})
>>> convert_json_field_to_pandas_type({"name": "an_int", "type": "integer"})
'int64'
>>> convert_json_field_to_pandas_type({'name': 'a_categorical',
'type': 'any',
'constraints': {'enum': [
'a', 'b', 'c']},
'ordered': True})
'CategoricalDtype(categories=['a', 'b', 'c'], ordered=True)'
>>> convert_json_field_to_pandas_type({'name': 'a_datetime',
'type': 'datetime'})

>>> convert_json_field_to_pandas_type(
... {
... "name": "a_categorical",
... "type": "any",
... "constraints": {"enum": ["a", "b", "c"]},
... "ordered": True,
... }
... )
CategoricalDtype(categories=['a', 'b', 'c'], ordered=True)

>>> convert_json_field_to_pandas_type({"name": "a_datetime", "type": "datetime"})
'datetime64[ns]'
>>> convert_json_field_to_pandas_type({'name': 'a_datetime_with_tz',
'type': 'datetime',
'tz': 'US/Central'})

>>> convert_json_field_to_pandas_type(
... {"name": "a_datetime_with_tz", "type": "datetime", "tz": "US/Central"}
... )
'datetime64[ns, US/Central]'
"""
typ = field["type"]
Expand Down Expand Up @@ -245,12 +249,13 @@ def build_table_schema(
... 'C': pd.date_range('2016-01-01', freq='d', periods=3),
... }, index=pd.Index(range(3), name='idx'))
>>> build_table_schema(df)
{'fields': [{'name': 'idx', 'type': 'integer'},
{'name': 'A', 'type': 'integer'},
{'name': 'B', 'type': 'string'},
{'name': 'C', 'type': 'datetime'}],
'pandas_version': '0.20.0',
'primaryKey': ['idx']}
{'fields': \
[{'name': 'idx', 'type': 'integer'}, \
{'name': 'A', 'type': 'integer'}, \
{'name': 'B', 'type': 'string'}, \
{'name': 'C', 'type': 'datetime'}], \
'primaryKey': ['idx'], \
'pandas_version': '0.20.0'}
"""
if index is True:
data = set_default_names(data)
Expand Down