Skip to content

Commit 78e453e

Browse files
committed
Merge branch 'Komnomnomnom-ujson-precise-float'
2 parents 0d6c0f0 + 4e72d46 commit 78e453e

File tree

5 files changed

+64
-27
lines changed

5 files changed

+64
-27
lines changed

doc/source/io.rst

+2
Original file line numberDiff line numberDiff line change
@@ -1060,6 +1060,8 @@ is ``None``. To explicity force ``Series`` parsing, pass ``typ=series``
10601060
- ``keep_default_dates`` : boolean, default True. If parsing dates, then parse the default datelike columns
10611061
- ``numpy`` : direct decoding to numpy arrays. default is False;
10621062
Note that the JSON ordering **MUST** be the same for each term if ``numpy=True``
1063+
- ``precise_float`` : boolean, default ``False``. Set to enable usage of higher precision (strtod) function
1064+
when decoding string to double values. Default (``False``) is to use fast but less precise builtin functionality
10631065

10641066
The parser will raise one of ``ValueError/TypeError/AssertionError`` if the JSON is
10651067
not parsable.

doc/source/release.rst

+3
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,9 @@ pandas 0.12
3535
list of ``DataFrame`` s courtesy of @cpcloud. (:issue:`3477`,
3636
:issue:`3605`, :issue:`3606`)
3737
- Support for reading Amazon S3 files. (:issue:`3504`)
38+
- Added module for reading and writing JSON strings/files: pandas.io.json
39+
includes ``to_json`` DataFrame/Series method, and a ``read_json`` top-level reader
40+
various issues (:issue:`1226`, :issue:`3804`, :issue:`3876`, :issue:`3867`, :issue:`1305`)
3841
- Added module for reading and writing Stata files: pandas.io.stata (:issue:`1512`)
3942
includes ``to_stata`` DataFrame method, and a ``read_stata`` top-level reader
4043
- Added support for writing in ``to_csv`` and reading in ``read_csv``,

doc/source/v0.12.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,7 @@ I/O Enhancements
206206
- Added module for reading and writing json format files: ``pandas.io.json``
207207
accessable via ``read_json`` top-level function for reading,
208208
and ``to_json`` DataFrame method for writing, :ref:`See the docs<io.json>`
209+
various issues (:issue:`1226`, :issue:`3804`, :issue:`3876`, :issue:`3867`, :issue:`1305`)
209210

210211
- ``MultiIndex`` column support for reading and writing csv format files
211212

pandas/io/json.py

+48-27
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,9 @@
1616
### interface to/from ###
1717

1818
def to_json(path_or_buf, obj, orient=None, date_format='epoch', double_precision=10, force_ascii=True):
19-
19+
2020
if isinstance(obj, Series):
21-
s = SeriesWriter(obj, orient=orient, date_format=date_format, double_precision=double_precision,
21+
s = SeriesWriter(obj, orient=orient, date_format=date_format, double_precision=double_precision,
2222
ensure_ascii=force_ascii).write()
2323
elif isinstance(obj, DataFrame):
2424
s = FrameWriter(obj, orient=orient, date_format=date_format, double_precision=double_precision,
@@ -41,7 +41,7 @@ def __init__(self, obj, orient, date_format, double_precision, ensure_ascii):
4141

4242
if orient is None:
4343
orient = self._default_orient
44-
44+
4545
self.orient = orient
4646
self.date_format = date_format
4747
self.double_precision = double_precision
@@ -64,7 +64,7 @@ def _format_to_date(self, data):
6464
if self._needs_to_date(data):
6565
return data.apply(lambda x: x.isoformat())
6666
return data
67-
67+
6868
def copy_if_needed(self):
6969
""" copy myself if necessary """
7070
if not self.is_copy:
@@ -119,7 +119,8 @@ def _format_dates(self):
119119
self.obj[c] = self._format_to_date(self.obj[c])
120120

121121
def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True,
122-
convert_axes=True, convert_dates=True, keep_default_dates=True, numpy=False):
122+
convert_axes=True, convert_dates=True, keep_default_dates=True,
123+
numpy=False, precise_float=False):
123124
"""
124125
Convert JSON string to pandas object
125126
@@ -154,8 +155,10 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True,
154155
default is True
155156
keep_default_dates : boolean, default True. If parsing dates,
156157
then parse the default datelike columns
157-
numpy: direct decoding to numpy arrays. default is False.Note that the JSON ordering MUST be the same
158+
numpy : direct decoding to numpy arrays. default is False.Note that the JSON ordering MUST be the same
158159
for each term if numpy=True.
160+
precise_float : boolean, default False. Set to enable usage of higher precision (strtod) function
161+
when decoding string to double values. Default (False) is to use fast but less precise builtin functionality
159162
160163
Returns
161164
-------
@@ -186,28 +189,31 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True,
186189
return obj
187190

188191
class Parser(object):
189-
190-
def __init__(self, json, orient, dtype=True, convert_axes=True, convert_dates=True, keep_default_dates=False, numpy=False):
192+
193+
def __init__(self, json, orient, dtype=True, convert_axes=True,
194+
convert_dates=True, keep_default_dates=False, numpy=False,
195+
precise_float=False):
191196
self.json = json
192197

193198
if orient is None:
194199
orient = self._default_orient
195-
200+
196201
self.orient = orient
197202
self.dtype = dtype
198203

199204
if orient == "split":
200205
numpy = False
201206

202207
self.numpy = numpy
208+
self.precise_float = precise_float
203209
self.convert_axes = convert_axes
204210
self.convert_dates = convert_dates
205211
self.keep_default_dates = keep_default_dates
206212
self.obj = None
207213

208214
def parse(self):
209215

210-
# try numpy
216+
# try numpy
211217
numpy = self.numpy
212218
if numpy:
213219
self._parse_numpy()
@@ -269,7 +275,7 @@ def _try_convert_data(self, name, data, use_dtypes=True, convert_dates=True):
269275
pass
270276

271277
if data.dtype == 'float':
272-
278+
273279
# coerce floats to 64
274280
try:
275281
data = data.astype('float64')
@@ -291,7 +297,7 @@ def _try_convert_data(self, name, data, use_dtypes=True, convert_dates=True):
291297

292298
# coerce ints to 64
293299
if data.dtype == 'int':
294-
300+
295301
# coerce floats to 64
296302
try:
297303
data = data.astype('int64')
@@ -322,7 +328,7 @@ def _try_convert_to_date(self, data):
322328
if issubclass(new_data.dtype.type,np.number):
323329
if not ((new_data == iNaT) | (new_data > 31536000000000000L)).all():
324330
return data, False
325-
331+
326332
try:
327333
new_data = to_datetime(new_data)
328334
except:
@@ -342,29 +348,35 @@ class SeriesParser(Parser):
342348
_default_orient = 'index'
343349

344350
def _parse_no_numpy(self):
345-
351+
346352
json = self.json
347353
orient = self.orient
348354
if orient == "split":
349355
decoded = dict((str(k), v)
350-
for k, v in loads(json).iteritems())
356+
for k, v in loads(
357+
json,
358+
precise_float=self.precise_float).iteritems())
351359
self.obj = Series(dtype=None, **decoded)
352360
else:
353-
self.obj = Series(loads(json), dtype=None)
361+
self.obj = Series(
362+
loads(json, precise_float=self.precise_float), dtype=None)
354363

355364
def _parse_numpy(self):
356365

357366
json = self.json
358367
orient = self.orient
359368
if orient == "split":
360-
decoded = loads(json, dtype=None, numpy=True)
369+
decoded = loads(json, dtype=None, numpy=True,
370+
precise_float=self.precise_float)
361371
decoded = dict((str(k), v) for k, v in decoded.iteritems())
362372
self.obj = Series(**decoded)
363373
elif orient == "columns" or orient == "index":
364374
self.obj = Series(*loads(json, dtype=None, numpy=True,
365-
labelled=True))
375+
labelled=True,
376+
precise_float=self.precise_float))
366377
else:
367-
self.obj = Series(loads(json, dtype=None, numpy=True))
378+
self.obj = Series(loads(json, dtype=None, numpy=True,
379+
precise_float=self.precise_float))
368380

369381
def _try_convert_types(self):
370382
if self.obj is None: return
@@ -381,34 +393,43 @@ def _parse_numpy(self):
381393
orient = self.orient
382394

383395
if orient == "columns":
384-
args = loads(json, dtype=None, numpy=True, labelled=True)
396+
args = loads(json, dtype=None, numpy=True, labelled=True,
397+
precise_float=self.precise_float)
385398
if args:
386399
args = (args[0].T, args[2], args[1])
387400
self.obj = DataFrame(*args)
388401
elif orient == "split":
389-
decoded = loads(json, dtype=None, numpy=True)
402+
decoded = loads(json, dtype=None, numpy=True,
403+
precise_float=self.precise_float)
390404
decoded = dict((str(k), v) for k, v in decoded.iteritems())
391405
self.obj = DataFrame(**decoded)
392406
elif orient == "values":
393-
self.obj = DataFrame(loads(json, dtype=None, numpy=True))
407+
self.obj = DataFrame(loads(json, dtype=None, numpy=True,
408+
precise_float=self.precise_float))
394409
else:
395-
self.obj = DataFrame(*loads(json, dtype=None, numpy=True, labelled=True))
410+
self.obj = DataFrame(*loads(json, dtype=None, numpy=True, labelled=True,
411+
precise_float=self.precise_float))
396412

397413
def _parse_no_numpy(self):
398414

399415
json = self.json
400416
orient = self.orient
401417

402418
if orient == "columns":
403-
self.obj = DataFrame(loads(json), dtype=None)
419+
self.obj = DataFrame(
420+
loads(json, precise_float=self.precise_float), dtype=None)
404421
elif orient == "split":
405422
decoded = dict((str(k), v)
406-
for k, v in loads(json).iteritems())
423+
for k, v in loads(
424+
json,
425+
precise_float=self.precise_float).iteritems())
407426
self.obj = DataFrame(dtype=None, **decoded)
408427
elif orient == "index":
409-
self.obj = DataFrame(loads(json), dtype=None).T
428+
self.obj = DataFrame(
429+
loads(json, precise_float=self.precise_float), dtype=None).T
410430
else:
411-
self.obj = DataFrame(loads(json), dtype=None)
431+
self.obj = DataFrame(
432+
loads(json, precise_float=self.precise_float), dtype=None)
412433

413434
def _try_convert_types(self):
414435
if self.obj is None: return

pandas/io/tests/test_json/test_pandas.py

+10
Original file line numberDiff line numberDiff line change
@@ -289,6 +289,16 @@ def test_series_to_json_except(self):
289289
s = Series([1, 2, 3])
290290
self.assertRaises(ValueError, s.to_json, orient="garbage")
291291

292+
def test_series_from_json_precise_float(self):
293+
s = Series([4.56, 4.56, 4.56])
294+
result = read_json(s.to_json(), typ='series', precise_float=True)
295+
assert_series_equal(result, s)
296+
297+
def test_frame_from_json_precise_float(self):
298+
df = DataFrame([[4.56, 4.56, 4.56], [4.56, 4.56, 4.56]])
299+
result = read_json(df.to_json(), precise_float=True)
300+
assert_frame_equal(result, df)
301+
292302
def test_typ(self):
293303

294304
s = Series(range(6), index=['a','b','c','d','e','f'], dtype='int64')

0 commit comments

Comments
 (0)