Skip to content

Commit 2697b49

Browse files
committed
ENH: added date_format parm to to_josn to allow epoch or iso formats (which both can be
can be parsed with parse_dates=True in read_json)
1 parent 8e673cf commit 2697b49

File tree

4 files changed

+194
-55
lines changed

4 files changed

+194
-55
lines changed

doc/source/io.rst

+12-2
Original file line numberDiff line numberDiff line change
@@ -962,17 +962,27 @@ with optional parameters:
962962
* columns : dict like {column -> {index -> value}}
963963
* values : just the values array
964964

965+
- date_format : type of date conversion (epoch = epoch milliseconds, iso = ISO8601), default is epoch
965966
- double_precision : The number of decimal places to use when encoding floating point values, default 10.
966967
- force_ascii : force encoded string to be ASCII, default True.
967968

968-
Note NaN's and None will be converted to null and datetime objects will be converted to UNIX timestamps.
969+
Note NaN's and None will be converted to null and datetime objects will be converted based on the date_format parameter
969970

970971
.. ipython:: python
971972
972973
dfj = DataFrame(randn(5, 2), columns=list('AB'))
973974
json = dfj.to_json()
974975
json
975976
977+
Writing in iso date format
978+
979+
.. ipython:: python
980+
981+
dfd = DataFrame(randn(5, 2), columns=list('AB'))
982+
dfd['date'] = Timestamp('20130101')
983+
json = dfd.to_json(date_format='iso')
984+
json
985+
976986
Writing to a file, with a date index and a date column
977987

978988
.. ipython:: python
@@ -1003,7 +1013,7 @@ is ``None``. To explicity force ``Series`` parsing, pass ``typ=series``
10031013

10041014
- dtype : dtype of the resulting object
10051015
- numpy : direct decoding to numpy arrays. default True but falls back to standard decoding if a problem occurs.
1006-
- parse_dates : a list of columns to parse for dates; If True, then try to parse datelike columns, default is True
1016+
- parse_dates : a list of columns to parse for dates; If True, then try to parse datelike columns, default is False
10071017
- keep_default_dates : boolean, default True. If parsing dates, then parse the default datelike columns
10081018

10091019
The parser will raise one of ``ValueError/TypeError/AssertionError`` if the JSON is

pandas/core/generic.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -495,8 +495,8 @@ def to_clipboard(self):
495495
from pandas.io import clipboard
496496
clipboard.to_clipboard(self)
497497

498-
def to_json(self, path_or_buf=None, orient=None, double_precision=10,
499-
force_ascii=True):
498+
def to_json(self, path_or_buf=None, orient=None, date_format='epoch',
499+
double_precision=10, force_ascii=True):
500500
"""
501501
Convert the object to a JSON string.
502502
@@ -517,6 +517,8 @@ def to_json(self, path_or_buf=None, orient=None, double_precision=10,
517517
index : dict like {index -> {column -> value}}
518518
columns : dict like {column -> {index -> value}}
519519
values : just the values array
520+
date_format : type of date conversion (epoch = epoch milliseconds, iso = ISO8601),
521+
default is epoch
520522
double_precision : The number of decimal places to use when encoding
521523
floating point values, default 10.
522524
force_ascii : force encoded string to be ASCII, default True.
@@ -529,8 +531,8 @@ def to_json(self, path_or_buf=None, orient=None, double_precision=10,
529531
"""
530532

531533
from pandas.io import json
532-
return json.to_json(path_or_buf=path_or_buf, obj=self, orient=orient, double_precision=double_precision,
533-
force_ascii=force_ascii)
534+
return json.to_json(path_or_buf=path_or_buf, obj=self, orient=orient, date_format=date_format,
535+
double_precision=double_precision, force_ascii=force_ascii)
534536

535537
# install the indexerse
536538
for _name, _indexer in indexing.get_indexers_list():

pandas/io/json.py

+145-47
Original file line numberDiff line numberDiff line change
@@ -10,26 +10,107 @@
1010
dumps = _json.dumps
1111

1212
import numpy as np
13+
from pandas.tslib import iNaT
1314

1415
### interface to/from ###
1516

16-
def to_json(path_or_buf, obj, orient=None, double_precision=10, force_ascii=True):
17+
def to_json(path_or_buf, obj, orient=None, date_format='epoch', double_precision=10, force_ascii=True):
1718

19+
if isinstance(obj, Series):
20+
s = SeriesWriter(obj, orient=orient, date_format=date_format, double_precision=double_precision,
21+
ensure_ascii=force_ascii).write()
22+
elif isinstance(obj, DataFrame):
23+
s = FrameWriter(obj, orient=orient, date_format=date_format, double_precision=double_precision,
24+
ensure_ascii=force_ascii).write()
25+
else:
26+
raise NotImplementedError
27+
28+
if isinstance(path_or_buf, basestring):
29+
with open(path_or_buf,'w') as fh:
30+
fh.write(s)
31+
elif path_or_buf is None:
32+
return s
33+
else:
34+
path_or_buf.write(s)
35+
36+
class Writer(object):
37+
38+
def __init__(self, obj, orient, date_format, double_precision, ensure_ascii):
39+
self.obj = obj
40+
1841
if orient is None:
19-
if isinstance(obj, Series):
20-
orient = 'index'
21-
elif isinstance(obj, DataFrame):
22-
orient = 'columns'
23-
24-
s = dumps(obj, orient=orient, double_precision=double_precision,
25-
ensure_ascii=force_ascii)
26-
if isinstance(path_or_buf, basestring):
27-
with open(path_or_buf,'w') as fh:
28-
fh.write(s)
29-
elif path_or_buf is None:
30-
return s
42+
orient = self._default_orient
43+
44+
self.orient = orient
45+
self.date_format = date_format
46+
self.double_precision = double_precision
47+
self.ensure_ascii = ensure_ascii
48+
49+
self.is_copy = False
50+
self._format_axes()
51+
self._format_dates()
52+
53+
def _format_dates(self):
54+
raise NotImplementedError
55+
56+
def _format_axes(self):
57+
raise NotImplementedError
58+
59+
def _needs_to_date(self, data):
60+
return self.date_format == 'iso' and data.dtype == 'datetime64[ns]'
61+
62+
def _format_to_date(self, data):
63+
if self._needs_to_date(data):
64+
return data.apply(lambda x: x.isoformat())
65+
return data
66+
67+
def copy_if_needed(self):
68+
""" copy myself if necessary """
69+
if not self.is_copy:
70+
self.obj = self.obj.copy()
71+
self.is_copy = True
72+
73+
def write(self):
74+
return dumps(self.obj, orient=self.orient, double_precision=self.double_precision, ensure_ascii=self.ensure_ascii)
75+
76+
class SeriesWriter(Writer):
77+
_default_orient = 'index'
78+
79+
def _format_axes(self):
80+
if self._needs_to_date(self.obj.index):
81+
self.copy_if_needed()
82+
self.obj.index = self._format_to_date(self.obj.index.to_series())
83+
84+
def _format_dates(self):
85+
if self._needs_to_date(self.obj):
86+
self.copy_if_needed()
87+
self.obj = self._format_to_date(self.obj)
88+
89+
class FrameWriter(Writer):
90+
_default_orient = 'columns'
91+
92+
def _format_axes(self):
93+
""" try to axes if they are datelike """
94+
if self.orient == 'columns':
95+
axis = 'index'
96+
elif self.orient == 'index':
97+
axis = 'columns'
3198
else:
32-
path_or_buf.write(s)
99+
return
100+
101+
a = getattr(self.obj,axis)
102+
if self._needs_to_date(a):
103+
self.copy_if_needed()
104+
setattr(self.obj,axis,self._format_to_date(a.to_series()))
105+
106+
def _format_dates(self):
107+
if self.date_format == 'iso':
108+
dtypes = self.obj.dtypes
109+
dtypes = dtypes[dtypes == 'datetime64[ns]']
110+
if len(dtypes):
111+
self.copy_if_needed()
112+
for c in dtypes.index:
113+
self.obj[c] = self._format_to_date(self.obj[c])
33114

34115
def read_json(path_or_buf=None, orient=None, typ='frame', dtype=None, numpy=True,
35116
parse_dates=False, keep_default_dates=True):
@@ -79,12 +160,11 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=None, numpy=True
79160
obj = FrameParser(json, orient, dtype, numpy, parse_dates, keep_default_dates).parse()
80161

81162
if typ == 'series' or obj is None:
82-
obj = SeriesParser(json, orient, dtype, numpy).parse()
163+
obj = SeriesParser(json, orient, dtype, numpy, parse_dates, keep_default_dates).parse()
83164

84165
return obj
85166

86167
class Parser(object):
87-
_min_date = 31536000000000000L
88168

89169
def __init__(self, json, orient, dtype, numpy, parse_dates=False, keep_default_dates=False):
90170
self.json = json
@@ -106,12 +186,43 @@ def __init__(self, json, orient, dtype, numpy, parse_dates=False, keep_default_d
106186
def parse(self):
107187
self._parse()
108188
if self.obj is not None:
109-
self.convert_axes()
189+
self._convert_axes()
110190
if self.parse_dates:
111-
self.try_parse_dates()
191+
self._try_parse_dates()
112192
return self.obj
113193

114-
def try_parse_dates(self):
194+
195+
def _try_parse_to_date(self, data):
196+
""" try to parse a ndarray like into a date column
197+
try to coerce object in epoch/iso formats and
198+
integer/float in epcoh formats """
199+
200+
new_data = data
201+
if new_data.dtype == 'object':
202+
try:
203+
new_data = data.astype('int64')
204+
except:
205+
pass
206+
207+
208+
# ignore numbers that are out of range
209+
if issubclass(new_data.dtype.type,np.number):
210+
if not ((new_data == iNaT) | (new_data > 31536000000000000L)).all():
211+
return data
212+
213+
try:
214+
new_data = to_datetime(new_data)
215+
except:
216+
try:
217+
new_data = to_datetime(new_data.astype('int64'))
218+
except:
219+
220+
# return old, noting more we can do
221+
new_data = data
222+
223+
return new_data
224+
225+
def _try_parse_dates(self):
115226
raise NotImplementedError
116227

117228
class SeriesParser(Parser):
@@ -146,15 +257,19 @@ def _parse(self):
146257
else:
147258
self.obj = Series(loads(json), dtype=dtype)
148259

149-
def convert_axes(self):
260+
def _convert_axes(self):
150261
""" try to axes if they are datelike """
151-
if self.obj is None: return
152-
153262
try:
154-
self.obj.index = to_datetime(self.obj.index.astype('int64'))
263+
self.obj.index = self._try_parse_to_date(self.obj.index)
155264
except:
156265
pass
157266

267+
def _try_parse_dates(self):
268+
if self.obj is None: return
269+
270+
if self.parse_dates:
271+
self.obj = self._try_parse_to_date(self.obj)
272+
158273
class FrameParser(Parser):
159274
_default_orient = 'columns'
160275

@@ -196,10 +311,8 @@ def _parse(self):
196311
else:
197312
self.obj = DataFrame(loads(json), dtype=dtype)
198313

199-
def convert_axes(self):
314+
def _convert_axes(self):
200315
""" try to axes if they are datelike """
201-
if self.obj is None: return
202-
203316
if self.orient == 'columns':
204317
axis = 'index'
205318
elif self.orient == 'index':
@@ -208,18 +321,12 @@ def convert_axes(self):
208321
return
209322

210323
try:
211-
a = getattr(self.obj,axis).astype('int64')
212-
if (a>self._min_date).all():
213-
setattr(self.obj,axis,to_datetime(a))
324+
a = getattr(self.obj,axis)
325+
setattr(self.obj,axis,self._try_parse_to_date(a))
214326
except:
215327
pass
216328

217-
def try_parse_dates(self):
218-
"""
219-
try to parse out dates
220-
these are only in in64 columns
221-
"""
222-
329+
def _try_parse_dates(self):
223330
if self.obj is None: return
224331

225332
# our columns to parse
@@ -228,13 +335,10 @@ def try_parse_dates(self):
228335
parse_dates = []
229336
parse_dates = set(parse_dates)
230337

231-
def is_ok(col, c):
338+
def is_ok(col):
232339
""" return if this col is ok to try for a date parse """
233340
if not isinstance(col, basestring): return False
234341

235-
if issubclass(c.dtype.type,np.number) and (c<self._min_date).all():
236-
return False
237-
238342
if (col.endswith('_at') or
239343
col.endswith('_time') or
240344
col.lower() == 'modified' or
@@ -245,11 +349,5 @@ def is_ok(col, c):
245349

246350

247351
for col, c in self.obj.iteritems():
248-
if (self.keep_default_dates and is_ok(col, c)) or col in parse_dates:
249-
try:
250-
self.obj[col] = to_datetime(c)
251-
except:
252-
try:
253-
self.obj[col] = to_datetime(c.astype('int64'))
254-
except:
255-
pass
352+
if (self.keep_default_dates and is_ok(col)) or col in parse_dates:
353+
self.obj[col] = self._try_parse_to_date(c)

pandas/io/tests/test_json/test_pandas.py

100755100644
+31-2
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@
3232

3333
_mixed_frame = _frame.copy()
3434

35-
3635
class TestPandasObjects(unittest.TestCase):
3736

3837
def setUp(self):
@@ -266,13 +265,19 @@ def test_path(self):
266265

267266
def test_axis_dates(self):
268267

269-
# axis conversion
268+
# frame
270269
json = self.tsframe.to_json()
271270
result = read_json(json)
272271
assert_frame_equal(result,self.tsframe)
273272

273+
# series
274+
json = self.ts.to_json()
275+
result = read_json(json,typ='series')
276+
assert_series_equal(result,self.ts)
277+
274278
def test_parse_dates(self):
275279

280+
# frame
276281
df = self.tsframe.copy()
277282
df['date'] = Timestamp('20130101')
278283

@@ -285,6 +290,30 @@ def test_parse_dates(self):
285290
result = read_json(json,parse_dates=True)
286291
assert_frame_equal(result,df)
287292

293+
# series
294+
ts = Series(Timestamp('20130101'),index=self.ts.index)
295+
json = ts.to_json()
296+
result = read_json(json,typ='series',parse_dates=True)
297+
assert_series_equal(result,ts)
298+
299+
def test_date_format(self):
300+
301+
df = self.tsframe.copy()
302+
df['date'] = Timestamp('20130101')
303+
df_orig = df.copy()
304+
305+
json = df.to_json(date_format='iso')
306+
result = read_json(json,parse_dates=True)
307+
assert_frame_equal(result,df_orig)
308+
309+
# make sure that we did in fact copy
310+
assert_frame_equal(df,df_orig)
311+
312+
ts = Series(Timestamp('20130101'),index=self.ts.index)
313+
json = ts.to_json(date_format='iso')
314+
result = read_json(json,typ='series',parse_dates=True)
315+
assert_series_equal(result,ts)
316+
288317
@network
289318
@slow
290319
def test_url(self):

0 commit comments

Comments
 (0)