From 94fc4566fab236853e877c437779adcae9e21f2a Mon Sep 17 00:00:00 2001 From: Chris Warth Date: Thu, 22 Sep 2016 18:27:22 -0700 Subject: [PATCH 1/5] DOC: Expand on reference docs for panda.read_json() --- pandas/io/json.py | 53 ++++++++++++++++++++++++++++++++++++----------- 1 file changed, 41 insertions(+), 12 deletions(-) diff --git a/pandas/io/json.py b/pandas/io/json.py index 1e258101a5d86..b3d7f33d94210 100644 --- a/pandas/io/json.py +++ b/pandas/io/json.py @@ -123,32 +123,39 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True, file. For file URLs, a host is expected. For instance, a local file could be ``file://localhost/path/to/table.json`` - orient + orient : string, indicating the expected format of the JSON input. + The set of allowed orients changes depending on the value + of the ``typ`` parameter. - * `Series` + * when ``typ == 'series'``, + - allowed orients are ``{'split','records','index'}`` - default is ``'index'`` - - allowed values are: ``{'split','records','index'}`` - The Series index must be unique for orient ``'index'``. - * `DataFrame` + * when ``typ == 'frame'``, + - allowed orients are ``{'split','records','index', + 'columns','values'}`` - default is ``'columns'`` - - allowed values are: {'split','records','index','columns','values'} - The DataFrame index must be unique for orients 'index' and 'columns'. - The DataFrame columns must be unique for orients 'index', 'columns', and 'records'. - * The format of the JSON string - - split : dict like + The value of ``orient`` specifies the expected format of the + JSON string. The expected JSON formats are compatible with the + strings produced by ``to_json()`` with a corresponding value + of ``orient``. + + - ``'split'`` : dict like ``{index -> [index], columns -> [columns], data -> [values]}`` - - records : list like + - ``'records'`` : list like ``[{column -> value}, ... , {column -> value}]`` - - index : dict like ``{index -> {column -> value}}`` - - columns : dict like ``{column -> {index -> value}}`` - - values : just the values array + - ``'index'`` : dict like ``{index -> {column -> value}}`` + - ``'columns'`` : dict like ``{column -> {index -> value}}`` + - ``'values'`` : just the values array typ : type of object to recover (series or frame), default 'frame' dtype : boolean or dict, default True @@ -197,7 +204,29 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True, Returns ------- - result : Series or DataFrame + result : Series or DataFrame, depending on the value of ``typ``. + + Examples + -------- + + >>> df = pd.DataFrame([['a', 'b'], ['c', 'd']], + index=['row 1', 'row 2'], + columns=['col 1', 'col 2']) + >>> print df + col 1 col 2 + row 1 a b + row 2 c d + >>> for orient in ['split', 'records', 'index']: + str = df.to_json(orient=orient) + print "'{}': '{}'".format(orient, str) + pd.read_json(str, orient=orient) + 'split': + '{"columns":["col 1","col 2"],"index":["row 1","row 2"],"data":[["a","b"], + ["c","d"]]}' + 'records': + '[{"col 1":"a","col 2":"b"},{"col 1":"c","col 2":"d"}]' + 'index': + '{"row 1":{"col 1":"a","col 2":"b"},"row 2":{"col 1":"c","col 2":"d"}}' """ filepath_or_buffer, _, _ = get_filepath_or_buffer(path_or_buf, From 943baeea31d679e9b4dc3bc460013fc2585b20d1 Mon Sep 17 00:00:00 2001 From: Chris Warth Date: Mon, 26 Sep 2016 11:28:44 -0700 Subject: [PATCH 2/5] address comments from @jorisvandenbossche --- pandas/io/json.py | 70 ++++++++++++++++++++++++++++------------------- 1 file changed, 42 insertions(+), 28 deletions(-) diff --git a/pandas/io/json.py b/pandas/io/json.py index b3d7f33d94210..25b17f54dce41 100644 --- a/pandas/io/json.py +++ b/pandas/io/json.py @@ -122,10 +122,12 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True, The string could be a URL. Valid URL schemes include http, ftp, s3, and file. For file URLs, a host is expected. For instance, a local file could be ``file://localhost/path/to/table.json`` + meta_prefix : string, default None - orient : string, indicating the expected format of the JSON input. + orient : string, + Indication of expected JSON input format. The set of allowed orients changes depending on the value - of the ``typ`` parameter. + of the `typ` parameter. * when ``typ == 'series'``, @@ -138,24 +140,24 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True, - allowed orients are ``{'split','records','index', 'columns','values'}`` - default is ``'columns'`` - - The DataFrame index must be unique for orients 'index' and - 'columns'. - - The DataFrame columns must be unique for orients 'index', - 'columns', and 'records'. + - The DataFrame index must be unique for orients ``'index'`` and + ``'columns'``. + - The DataFrame columns must be unique for orients ``'index'``, + ``'columns'``, and ``'records'``. - The value of ``orient`` specifies the expected format of the + The value of `orient` specifies the expected format of the JSON string. The expected JSON formats are compatible with the strings produced by ``to_json()`` with a corresponding value - of ``orient``. + of `orient`. - - ``'split'`` : dict like - ``{index -> [index], columns -> [columns], data -> [values]}`` - - ``'records'`` : list like - ``[{column -> value}, ... , {column -> value}]`` - - ``'index'`` : dict like ``{index -> {column -> value}}`` - - ``'columns'`` : dict like ``{column -> {index -> value}}`` - - ``'values'`` : just the values array + - ``'split'`` : dict like + ``{index -> [index], columns -> [columns], data -> [values]}`` + - ``'records'`` : list like + ``[{column -> value}, ... , {column -> value}]`` + - ``'index'`` : dict like ``{index -> {column -> value}}`` + - ``'columns'`` : dict like ``{column -> {index -> value}}`` + - ``'values'`` : just the values array typ : type of object to recover (series or frame), default 'frame' dtype : boolean or dict, default True @@ -204,29 +206,41 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True, Returns ------- - result : Series or DataFrame, depending on the value of ``typ``. + result : Series or DataFrame, depending on the value of `typ`. Examples -------- >>> df = pd.DataFrame([['a', 'b'], ['c', 'd']], - index=['row 1', 'row 2'], - columns=['col 1', 'col 2']) - >>> print df + ... index=['row 1', 'row 2'], + ... columns=['col 1', 'col 2']) + + >>> df.to_json(orient='split') + '{"columns":["col 1","col 2"], + "index":["row 1","row 2"], + "data":[["a","b"],["c","d"]]}' + + >>> pd.read_json(_, orient='split') col 1 col 2 row 1 a b row 2 c d - >>> for orient in ['split', 'records', 'index']: - str = df.to_json(orient=orient) - print "'{}': '{}'".format(orient, str) - pd.read_json(str, orient=orient) - 'split': - '{"columns":["col 1","col 2"],"index":["row 1","row 2"],"data":[["a","b"], - ["c","d"]]}' - 'records': + + >>> df.to_json(orient='records') '[{"col 1":"a","col 2":"b"},{"col 1":"c","col 2":"d"}]' - 'index': + + >>> pd.read_json(_, orient='records') + col 1 col 2 + 0 a b + 1 c d + + >>> df.to_json(orient='index') '{"row 1":{"col 1":"a","col 2":"b"},"row 2":{"col 1":"c","col 2":"d"}}' + + >>> pd.read_json(_, orient='index') + col 1 col 2 + row 1 a b + row 2 c d + """ filepath_or_buffer, _, _ = get_filepath_or_buffer(path_or_buf, From bdc25b730ffbd01514d4f8b62774b437e6fa3098 Mon Sep 17 00:00:00 2001 From: Chris Warth Date: Fri, 14 Oct 2016 15:10:45 -0700 Subject: [PATCH 3/5] addressing comments from jorisvandenbossche --- pandas/io/json.py | 52 +++++++++++++++++++++++------------------------ 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/pandas/io/json.py b/pandas/io/json.py index 25b17f54dce41..8f754227508a5 100644 --- a/pandas/io/json.py +++ b/pandas/io/json.py @@ -122,11 +122,22 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True, The string could be a URL. Valid URL schemes include http, ftp, s3, and file. For file URLs, a host is expected. For instance, a local file could be ``file://localhost/path/to/table.json`` - meta_prefix : string, default None orient : string, - Indication of expected JSON input format. - The set of allowed orients changes depending on the value + Indication of expected JSON string format. + Compatible JSON strings can be produced by ``to_json()`` with a + corresponding orient value. + The set of possible orients is: + + - ``'split'`` : dict like + ``{index -> [index], columns -> [columns], data -> [values]}`` + - ``'records'`` : list like + ``[{column -> value}, ... , {column -> value}]`` + - ``'index'`` : dict like ``{index -> {column -> value}}`` + - ``'columns'`` : dict like ``{column -> {index -> value}}`` + - ``'values'`` : just the values array + + The allowed and default values depend on the value of the `typ` parameter. * when ``typ == 'series'``, @@ -145,20 +156,6 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True, - The DataFrame columns must be unique for orients ``'index'``, ``'columns'``, and ``'records'``. - - The value of `orient` specifies the expected format of the - JSON string. The expected JSON formats are compatible with the - strings produced by ``to_json()`` with a corresponding value - of `orient`. - - - ``'split'`` : dict like - ``{index -> [index], columns -> [columns], data -> [values]}`` - - ``'records'`` : list like - ``[{column -> value}, ... , {column -> value}]`` - - ``'index'`` : dict like ``{index -> {column -> value}}`` - - ``'columns'`` : dict like ``{column -> {index -> value}}`` - - ``'values'`` : just the values array - typ : type of object to recover (series or frame), default 'frame' dtype : boolean or dict, default True If True, infer dtypes, if a dict of column to dtype, then use those, @@ -215,32 +212,35 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True, ... index=['row 1', 'row 2'], ... columns=['col 1', 'col 2']) + Encoding/decoding a Dataframe using ``'split'`` formatted JSON: + >>> df.to_json(orient='split') '{"columns":["col 1","col 2"], "index":["row 1","row 2"], "data":[["a","b"],["c","d"]]}' - >>> pd.read_json(_, orient='split') col 1 col 2 row 1 a b row 2 c d - >>> df.to_json(orient='records') - '[{"col 1":"a","col 2":"b"},{"col 1":"c","col 2":"d"}]' - - >>> pd.read_json(_, orient='records') - col 1 col 2 - 0 a b - 1 c d + Encoding/decoding a Dataframe using ``'index'`` formatted JSON: >>> df.to_json(orient='index') '{"row 1":{"col 1":"a","col 2":"b"},"row 2":{"col 1":"c","col 2":"d"}}' - >>> pd.read_json(_, orient='index') col 1 col 2 row 1 a b row 2 c d + Encoding/decoding a Dataframe using ``'records'`` formatted JSON. + Note that index labels are not preserved with this encoding. + + >>> df.to_json(orient='records') + '[{"col 1":"a","col 2":"b"},{"col 1":"c","col 2":"d"}]' + >>> pd.read_json(_, orient='records') + col 1 col 2 + 0 a b + 1 c d """ filepath_or_buffer, _, _ = get_filepath_or_buffer(path_or_buf, From 32c045cf771d32507cfca1692778215e08a1f85f Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 27 Oct 2016 15:45:29 +0200 Subject: [PATCH 4/5] add see also to to_json --- pandas/io/json.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/io/json.py b/pandas/io/json.py index 8f754227508a5..1e8bfbb3f7255 100644 --- a/pandas/io/json.py +++ b/pandas/io/json.py @@ -205,6 +205,10 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True, ------- result : Series or DataFrame, depending on the value of `typ`. + See Also + -------- + DataFrame.to_json + Examples -------- From 5e34a02ae37c499b2e4bb8701da2a6fca7ff019e Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 27 Oct 2016 15:48:43 +0200 Subject: [PATCH 5/5] fix indentation --- pandas/io/json.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pandas/io/json.py b/pandas/io/json.py index 1e8bfbb3f7255..878506a6ddc05 100644 --- a/pandas/io/json.py +++ b/pandas/io/json.py @@ -129,13 +129,13 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True, corresponding orient value. The set of possible orients is: - - ``'split'`` : dict like - ``{index -> [index], columns -> [columns], data -> [values]}`` - - ``'records'`` : list like - ``[{column -> value}, ... , {column -> value}]`` - - ``'index'`` : dict like ``{index -> {column -> value}}`` - - ``'columns'`` : dict like ``{column -> {index -> value}}`` - - ``'values'`` : just the values array + - ``'split'`` : dict like + ``{index -> [index], columns -> [columns], data -> [values]}`` + - ``'records'`` : list like + ``[{column -> value}, ... , {column -> value}]`` + - ``'index'`` : dict like ``{index -> {column -> value}}`` + - ``'columns'`` : dict like ``{column -> {index -> value}}`` + - ``'values'`` : just the values array The allowed and default values depend on the value of the `typ` parameter.