Skip to content

Commit a7f37d4

Browse files
committed
Merge pull request #3804 from jreback/ujson
ENH: add ujson support in pandas.io.json
2 parents e958833 + 8e4314d commit a7f37d4

25 files changed

+7083
-200
lines changed

LICENSES/ULTRAJSON_LICENSE

+34
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
Copyright (c) 2011-2013, ESN Social Software AB and Jonas Tarnstrom
2+
All rights reserved.
3+
4+
Redistribution and use in source and binary forms, with or without
5+
modification, are permitted provided that the following conditions are met:
6+
* Redistributions of source code must retain the above copyright
7+
notice, this list of conditions and the following disclaimer.
8+
* Redistributions in binary form must reproduce the above copyright
9+
notice, this list of conditions and the following disclaimer in the
10+
documentation and/or other materials provided with the distribution.
11+
* Neither the name of the ESN Social Software AB nor the
12+
names of its contributors may be used to endorse or promote products
13+
derived from this software without specific prior written permission.
14+
15+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16+
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17+
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18+
DISCLAIMED. IN NO EVENT SHALL ESN SOCIAL SOFTWARE AB OR JONAS TARNSTROM BE LIABLE
19+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20+
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21+
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22+
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24+
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25+
26+
27+
Portions of code from MODP_ASCII - Ascii transformations (upper/lower, etc)
28+
http://code.google.com/p/stringencoders/
29+
Copyright (c) 2007 Nick Galbreath -- nickg [at] modp [dot] com. All rights reserved.
30+
31+
Numeric decoder derived from from TCL library
32+
http://www.opensource.apple.com/source/tcl/tcl-14/tcl/license.terms
33+
* Copyright (c) 1988-1993 The Regents of the University of California.
34+
* Copyright (c) 1994 Sun Microsystems, Inc.

doc/source/api.rst

+11
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,16 @@ Excel
4545
read_excel
4646
ExcelFile.parse
4747

48+
JSON
49+
~~~~
50+
51+
.. currentmodule:: pandas.io.json
52+
53+
.. autosummary::
54+
:toctree: generated/
55+
56+
read_json
57+
4858
HTML
4959
~~~~
5060

@@ -597,6 +607,7 @@ Serialization / IO / Conversion
597607
DataFrame.to_hdf
598608
DataFrame.to_dict
599609
DataFrame.to_excel
610+
DataFrame.to_json
600611
DataFrame.to_html
601612
DataFrame.to_stata
602613
DataFrame.to_records

doc/source/io.rst

+100-1
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ object.
3535
* ``read_excel``
3636
* ``read_hdf``
3737
* ``read_sql``
38+
* ``read_json``
3839
* ``read_html``
3940
* ``read_stata``
4041
* ``read_clipboard``
@@ -45,6 +46,7 @@ The corresponding ``writer`` functions are object methods that are accessed like
4546
* ``to_excel``
4647
* ``to_hdf``
4748
* ``to_sql``
49+
* ``to_json``
4850
* ``to_html``
4951
* ``to_stata``
5052
* ``to_clipboard``
@@ -937,6 +939,104 @@ The Series object also has a ``to_string`` method, but with only the ``buf``,
937939
which, if set to ``True``, will additionally output the length of the Series.
938940

939941

942+
JSON
943+
----
944+
945+
Read and write ``JSON`` format files.
946+
947+
.. _io.json:
948+
949+
Writing JSON
950+
~~~~~~~~~~~~
951+
952+
A ``Series`` or ``DataFrame`` can be converted to a valid JSON string. Use ``to_json``
953+
with optional parameters:
954+
955+
- path_or_buf : the pathname or buffer to write the output
956+
This can be ``None`` in which case a JSON string is returned
957+
- orient : The format of the JSON string, default is ``index`` for ``Series``, ``columns`` for ``DataFrame``
958+
959+
* split : dict like {index -> [index], columns -> [columns], data -> [values]}
960+
* records : list like [{column -> value}, ... , {column -> value}]
961+
* index : dict like {index -> {column -> value}}
962+
* columns : dict like {column -> {index -> value}}
963+
* values : just the values array
964+
965+
- date_format : type of date conversion (epoch = epoch milliseconds, iso = ISO8601), default is epoch
966+
- double_precision : The number of decimal places to use when encoding floating point values, default 10.
967+
- force_ascii : force encoded string to be ASCII, default True.
968+
969+
Note NaN's and None will be converted to null and datetime objects will be converted based on the date_format parameter
970+
971+
.. ipython:: python
972+
973+
dfj = DataFrame(randn(5, 2), columns=list('AB'))
974+
json = dfj.to_json()
975+
json
976+
977+
Writing in iso date format
978+
979+
.. ipython:: python
980+
981+
dfd = DataFrame(randn(5, 2), columns=list('AB'))
982+
dfd['date'] = Timestamp('20130101')
983+
json = dfd.to_json(date_format='iso')
984+
json
985+
986+
Writing to a file, with a date index and a date column
987+
988+
.. ipython:: python
989+
990+
dfj2 = dfj.copy()
991+
dfj2['date'] = Timestamp('20130101')
992+
dfj2.index = date_range('20130101',periods=5)
993+
dfj2.to_json('test.json')
994+
open('test.json').read()
995+
996+
Reading JSON
997+
~~~~~~~~~~~~
998+
999+
Reading a JSON string to pandas object can take a number of parameters.
1000+
The parser will try to parse a ``DataFrame`` if ``typ`` is not supplied or
1001+
is ``None``. To explicity force ``Series`` parsing, pass ``typ=series``
1002+
1003+
- filepath_or_buffer : a **VALID** JSON string or file handle / StringIO. The string could be
1004+
a URL. Valid URL schemes include http, ftp, s3, and file. For file URLs, a host
1005+
is expected. For instance, a local file could be
1006+
file ://localhost/path/to/table.json
1007+
- typ : type of object to recover (series or frame), default 'frame'
1008+
- orient : The format of the JSON string, one of the following
1009+
1010+
* split : dict like {index -> [index], name -> name, data -> [values]}
1011+
* records : list like [value, ... , value]
1012+
* index : dict like {index -> value}
1013+
1014+
- dtype : dtype of the resulting object
1015+
- numpy : direct decoding to numpy arrays. default True but falls back to standard decoding if a problem occurs.
1016+
- parse_dates : a list of columns to parse for dates; If True, then try to parse datelike columns, default is False
1017+
- keep_default_dates : boolean, default True. If parsing dates, then parse the default datelike columns
1018+
1019+
The parser will raise one of ``ValueError/TypeError/AssertionError`` if the JSON is
1020+
not parsable.
1021+
1022+
Reading from a JSON string
1023+
1024+
.. ipython:: python
1025+
1026+
pd.read_json(json)
1027+
1028+
Reading from a file, parsing dates
1029+
1030+
.. ipython:: python
1031+
1032+
pd.read_json('test.json',parse_dates=True)
1033+
1034+
.. ipython:: python
1035+
:suppress:
1036+
1037+
import os
1038+
os.remove('test.json')
1039+
9401040
HTML
9411041
----
9421042

@@ -2193,7 +2293,6 @@ into a .dta file. The format version of this file is always the latest one, 115.
21932293
21942294
.. ipython:: python
21952295
2196-
from pandas.io.stata import StataWriter
21972296
df = DataFrame(randn(10, 2), columns=list('AB'))
21982297
df.to_stata('stata.dta')
21992298

doc/source/v0.11.1.txt

+6
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ API changes
1616
* ``read_excel``
1717
* ``read_hdf``
1818
* ``read_sql``
19+
* ``read_json``
1920
* ``read_html``
2021
* ``read_stata``
2122
* ``read_clipboard``
@@ -26,6 +27,7 @@ API changes
2627
* ``to_excel``
2728
* ``to_hdf``
2829
* ``to_sql``
30+
* ``to_json``
2931
* ``to_html``
3032
* ``to_stata``
3133
* ``to_clipboard``
@@ -175,6 +177,10 @@ Enhancements
175177
accessable via ``read_stata`` top-level function for reading,
176178
and ``to_stata`` DataFrame method for writing, :ref:`See the docs<io.stata>`
177179

180+
- Added module for reading and writing json format files: ``pandas.io.json``
181+
accessable via ``read_json`` top-level function for reading,
182+
and ``to_json`` DataFrame method for writing, :ref:`See the docs<io.json>`
183+
178184
- ``DataFrame.replace()`` now allows regular expressions on contained
179185
``Series`` with object dtype. See the examples section in the regular docs
180186
:ref:`Replacing via String Expression <missing_data.replace_expression>`

pandas/core/generic.py

+39
Original file line numberDiff line numberDiff line change
@@ -495,6 +495,45 @@ def to_clipboard(self):
495495
from pandas.io import clipboard
496496
clipboard.to_clipboard(self)
497497

498+
def to_json(self, path_or_buf=None, orient=None, date_format='epoch',
499+
double_precision=10, force_ascii=True):
500+
"""
501+
Convert the object to a JSON string.
502+
503+
Note NaN's and None will be converted to null and datetime objects
504+
will be converted to UNIX timestamps.
505+
506+
Parameters
507+
----------
508+
path_or_buf : the path or buffer to write the result string
509+
if this is None, return a StringIO of the converted string
510+
orient : {'split', 'records', 'index', 'columns', 'values'},
511+
default is 'index' for Series, 'columns' for DataFrame
512+
513+
The format of the JSON string
514+
split : dict like
515+
{index -> [index], columns -> [columns], data -> [values]}
516+
records : list like [{column -> value}, ... , {column -> value}]
517+
index : dict like {index -> {column -> value}}
518+
columns : dict like {column -> {index -> value}}
519+
values : just the values array
520+
date_format : type of date conversion (epoch = epoch milliseconds, iso = ISO8601),
521+
default is epoch
522+
double_precision : The number of decimal places to use when encoding
523+
floating point values, default 10.
524+
force_ascii : force encoded string to be ASCII, default True.
525+
526+
Returns
527+
-------
528+
result : a JSON compatible string written to the path_or_buf;
529+
if the path_or_buf is none, return a StringIO of the result
530+
531+
"""
532+
533+
from pandas.io import json
534+
return json.to_json(path_or_buf=path_or_buf, obj=self, orient=orient, date_format=date_format,
535+
double_precision=double_precision, force_ascii=force_ascii)
536+
498537
# install the indexerse
499538
for _name, _indexer in indexing.get_indexers_list():
500539
PandasObject._create_indexer(_name,_indexer)

pandas/io/api.py

+1
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from pandas.io.clipboard import read_clipboard
77
from pandas.io.excel import ExcelFile, ExcelWriter, read_excel
88
from pandas.io.pytables import HDFStore, Term, get_store, read_hdf
9+
from pandas.io.json import read_json
910
from pandas.io.html import read_html
1011
from pandas.io.sql import read_sql
1112
from pandas.io.stata import read_stata

pandas/io/common.py

+1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import urlparse
44
from pandas.util import py3compat
5+
from StringIO import StringIO
56

67
_VALID_URLS = set(urlparse.uses_relative + urlparse.uses_netloc +
78
urlparse.uses_params)

pandas/io/excel.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111

1212
from pandas.io.parsers import TextParser
1313
from pandas.tseries.period import Period
14-
import json
14+
from pandas import json
1515

1616
def read_excel(path_or_buf, sheetname, kind=None, **kwds):
1717
"""Read an Excel table into a pandas DataFrame

0 commit comments

Comments
 (0)