10
10
dumps = _json .dumps
11
11
12
12
import numpy as np
13
+ from pandas .tslib import iNaT
13
14
14
15
### interface to/from ###
15
16
16
- def to_json (path_or_buf , obj , orient = None , double_precision = 10 , force_ascii = True ):
17
+ def to_json (path_or_buf , obj , orient = None , date_format = 'epoch' , double_precision = 10 , force_ascii = True ):
17
18
19
+ if isinstance (obj , Series ):
20
+ s = SeriesWriter (obj , orient = orient , date_format = date_format , double_precision = double_precision ,
21
+ ensure_ascii = force_ascii ).write ()
22
+ elif isinstance (obj , DataFrame ):
23
+ s = FrameWriter (obj , orient = orient , date_format = date_format , double_precision = double_precision ,
24
+ ensure_ascii = force_ascii ).write ()
25
+ else :
26
+ raise NotImplementedError
27
+
28
+ if isinstance (path_or_buf , basestring ):
29
+ with open (path_or_buf ,'w' ) as fh :
30
+ fh .write (s )
31
+ elif path_or_buf is None :
32
+ return s
33
+ else :
34
+ path_or_buf .write (s )
35
+
36
+ class Writer (object ):
37
+
38
+ def __init__ (self , obj , orient , date_format , double_precision , ensure_ascii ):
39
+ self .obj = obj
40
+
18
41
if orient is None :
19
- if isinstance (obj , Series ):
20
- orient = 'index'
21
- elif isinstance (obj , DataFrame ):
22
- orient = 'columns'
23
-
24
- s = dumps (obj , orient = orient , double_precision = double_precision ,
25
- ensure_ascii = force_ascii )
26
- if isinstance (path_or_buf , basestring ):
27
- with open (path_or_buf ,'w' ) as fh :
28
- fh .write (s )
29
- elif path_or_buf is None :
30
- return s
42
+ orient = self ._default_orient
43
+
44
+ self .orient = orient
45
+ self .date_format = date_format
46
+ self .double_precision = double_precision
47
+ self .ensure_ascii = ensure_ascii
48
+
49
+ self .is_copy = False
50
+ self ._format_axes ()
51
+ self ._format_dates ()
52
+
53
+ def _format_dates (self ):
54
+ raise NotImplementedError
55
+
56
+ def _format_axes (self ):
57
+ raise NotImplementedError
58
+
59
+ def _needs_to_date (self , data ):
60
+ return self .date_format == 'iso' and data .dtype == 'datetime64[ns]'
61
+
62
+ def _format_to_date (self , data ):
63
+ if self ._needs_to_date (data ):
64
+ return data .apply (lambda x : x .isoformat ())
65
+ return data
66
+
67
+ def copy_if_needed (self ):
68
+ """ copy myself if necessary """
69
+ if not self .is_copy :
70
+ self .obj = self .obj .copy ()
71
+ self .is_copy = True
72
+
73
+ def write (self ):
74
+ return dumps (self .obj , orient = self .orient , double_precision = self .double_precision , ensure_ascii = self .ensure_ascii )
75
+
76
+ class SeriesWriter (Writer ):
77
+ _default_orient = 'index'
78
+
79
+ def _format_axes (self ):
80
+ if self ._needs_to_date (self .obj .index ):
81
+ self .copy_if_needed ()
82
+ self .obj .index = self ._format_to_date (self .obj .index .to_series ())
83
+
84
+ def _format_dates (self ):
85
+ if self ._needs_to_date (self .obj ):
86
+ self .copy_if_needed ()
87
+ self .obj = self ._format_to_date (self .obj )
88
+
89
+ class FrameWriter (Writer ):
90
+ _default_orient = 'columns'
91
+
92
+ def _format_axes (self ):
93
+ """ try to axes if they are datelike """
94
+ if self .orient == 'columns' :
95
+ axis = 'index'
96
+ elif self .orient == 'index' :
97
+ axis = 'columns'
31
98
else :
32
- path_or_buf .write (s )
99
+ return
100
+
101
+ a = getattr (self .obj ,axis )
102
+ if self ._needs_to_date (a ):
103
+ self .copy_if_needed ()
104
+ setattr (self .obj ,axis ,self ._format_to_date (a .to_series ()))
105
+
106
+ def _format_dates (self ):
107
+ if self .date_format == 'iso' :
108
+ dtypes = self .obj .dtypes
109
+ dtypes = dtypes [dtypes == 'datetime64[ns]' ]
110
+ if len (dtypes ):
111
+ self .copy_if_needed ()
112
+ for c in dtypes .index :
113
+ self .obj [c ] = self ._format_to_date (self .obj [c ])
33
114
34
115
def read_json (path_or_buf = None , orient = None , typ = 'frame' , dtype = None , numpy = True ,
35
116
parse_dates = False , keep_default_dates = True ):
@@ -79,12 +160,11 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=None, numpy=True
79
160
obj = FrameParser (json , orient , dtype , numpy , parse_dates , keep_default_dates ).parse ()
80
161
81
162
if typ == 'series' or obj is None :
82
- obj = SeriesParser (json , orient , dtype , numpy ).parse ()
163
+ obj = SeriesParser (json , orient , dtype , numpy , parse_dates , keep_default_dates ).parse ()
83
164
84
165
return obj
85
166
86
167
class Parser (object ):
87
- _min_date = 31536000000000000L
88
168
89
169
def __init__ (self , json , orient , dtype , numpy , parse_dates = False , keep_default_dates = False ):
90
170
self .json = json
@@ -106,12 +186,43 @@ def __init__(self, json, orient, dtype, numpy, parse_dates=False, keep_default_d
106
186
def parse (self ):
107
187
self ._parse ()
108
188
if self .obj is not None :
109
- self .convert_axes ()
189
+ self ._convert_axes ()
110
190
if self .parse_dates :
111
- self .try_parse_dates ()
191
+ self ._try_parse_dates ()
112
192
return self .obj
113
193
114
- def try_parse_dates (self ):
194
+
195
+ def _try_parse_to_date (self , data ):
196
+ """ try to parse a ndarray like into a date column
197
+ try to coerce object in epoch/iso formats and
198
+ integer/float in epcoh formats """
199
+
200
+ new_data = data
201
+ if new_data .dtype == 'object' :
202
+ try :
203
+ new_data = data .astype ('int64' )
204
+ except :
205
+ pass
206
+
207
+
208
+ # ignore numbers that are out of range
209
+ if issubclass (new_data .dtype .type ,np .number ):
210
+ if not ((new_data == iNaT ) | (new_data > 31536000000000000L )).all ():
211
+ return data
212
+
213
+ try :
214
+ new_data = to_datetime (new_data )
215
+ except :
216
+ try :
217
+ new_data = to_datetime (new_data .astype ('int64' ))
218
+ except :
219
+
220
+ # return old, noting more we can do
221
+ new_data = data
222
+
223
+ return new_data
224
+
225
+ def _try_parse_dates (self ):
115
226
raise NotImplementedError
116
227
117
228
class SeriesParser (Parser ):
@@ -146,15 +257,19 @@ def _parse(self):
146
257
else :
147
258
self .obj = Series (loads (json ), dtype = dtype )
148
259
149
- def convert_axes (self ):
260
+ def _convert_axes (self ):
150
261
""" try to axes if they are datelike """
151
- if self .obj is None : return
152
-
153
262
try :
154
- self .obj .index = to_datetime (self .obj .index . astype ( 'int64' ) )
263
+ self .obj .index = self . _try_parse_to_date (self .obj .index )
155
264
except :
156
265
pass
157
266
267
+ def _try_parse_dates (self ):
268
+ if self .obj is None : return
269
+
270
+ if self .parse_dates :
271
+ self .obj = self ._try_parse_to_date (self .obj )
272
+
158
273
class FrameParser (Parser ):
159
274
_default_orient = 'columns'
160
275
@@ -196,10 +311,8 @@ def _parse(self):
196
311
else :
197
312
self .obj = DataFrame (loads (json ), dtype = dtype )
198
313
199
- def convert_axes (self ):
314
+ def _convert_axes (self ):
200
315
""" try to axes if they are datelike """
201
- if self .obj is None : return
202
-
203
316
if self .orient == 'columns' :
204
317
axis = 'index'
205
318
elif self .orient == 'index' :
@@ -208,18 +321,12 @@ def convert_axes(self):
208
321
return
209
322
210
323
try :
211
- a = getattr (self .obj ,axis ).astype ('int64' )
212
- if (a > self ._min_date ).all ():
213
- setattr (self .obj ,axis ,to_datetime (a ))
324
+ a = getattr (self .obj ,axis )
325
+ setattr (self .obj ,axis ,self ._try_parse_to_date (a ))
214
326
except :
215
327
pass
216
328
217
- def try_parse_dates (self ):
218
- """
219
- try to parse out dates
220
- these are only in in64 columns
221
- """
222
-
329
+ def _try_parse_dates (self ):
223
330
if self .obj is None : return
224
331
225
332
# our columns to parse
@@ -228,13 +335,10 @@ def try_parse_dates(self):
228
335
parse_dates = []
229
336
parse_dates = set (parse_dates )
230
337
231
- def is_ok (col , c ):
338
+ def is_ok (col ):
232
339
""" return if this col is ok to try for a date parse """
233
340
if not isinstance (col , basestring ): return False
234
341
235
- if issubclass (c .dtype .type ,np .number ) and (c < self ._min_date ).all ():
236
- return False
237
-
238
342
if (col .endswith ('_at' ) or
239
343
col .endswith ('_time' ) or
240
344
col .lower () == 'modified' or
@@ -245,11 +349,5 @@ def is_ok(col, c):
245
349
246
350
247
351
for col , c in self .obj .iteritems ():
248
- if (self .keep_default_dates and is_ok (col , c )) or col in parse_dates :
249
- try :
250
- self .obj [col ] = to_datetime (c )
251
- except :
252
- try :
253
- self .obj [col ] = to_datetime (c .astype ('int64' ))
254
- except :
255
- pass
352
+ if (self .keep_default_dates and is_ok (col )) or col in parse_dates :
353
+ self .obj [col ] = self ._try_parse_to_date (c )
0 commit comments