1
1
# pylint: disable-msg=E1101,W0613,W0603
2
+ from __future__ import absolute_import
2
3
3
4
import os
4
5
import numpy as np
12
13
from pandas .core .common import AbstractMethodError
13
14
from pandas .formats .printing import pprint_thing
14
15
from .normalize import _convert_to_line_delimits
16
+ from .table_schema import build_table_schema
17
+ from pandas .types .common import is_period_dtype
15
18
16
19
loads = _json .loads
17
20
dumps = _json .dumps
18
21
22
+ TABLE_SCHEMA_VERSION = '0.20.0'
23
+
19
24
20
25
# interface to/from
21
26
def to_json (path_or_buf , obj , orient = None , date_format = 'epoch' ,
@@ -26,19 +31,22 @@ def to_json(path_or_buf, obj, orient=None, date_format='epoch',
26
31
raise ValueError (
27
32
"'lines' keyword only valid when 'orient' is records" )
28
33
29
- if isinstance (obj , Series ):
30
- s = SeriesWriter (
31
- obj , orient = orient , date_format = date_format ,
32
- double_precision = double_precision , ensure_ascii = force_ascii ,
33
- date_unit = date_unit , default_handler = default_handler ).write ()
34
+ if orient == 'table' and isinstance (obj , Series ):
35
+ obj = obj .to_frame (name = obj .name or 'values' )
36
+ if orient == 'table' and isinstance (obj , DataFrame ):
37
+ writer = JSONTableWriter
38
+ elif isinstance (obj , Series ):
39
+ writer = SeriesWriter
34
40
elif isinstance (obj , DataFrame ):
35
- s = FrameWriter (
36
- obj , orient = orient , date_format = date_format ,
37
- double_precision = double_precision , ensure_ascii = force_ascii ,
38
- date_unit = date_unit , default_handler = default_handler ).write ()
41
+ writer = FrameWriter
39
42
else :
40
43
raise NotImplementedError ("'obj' should be a Series or a DataFrame" )
41
44
45
+ s = writer (
46
+ obj , orient = orient , date_format = date_format ,
47
+ double_precision = double_precision , ensure_ascii = force_ascii ,
48
+ date_unit = date_unit , default_handler = default_handler ).write ()
49
+
42
50
if lines :
43
51
s = _convert_to_line_delimits (s )
44
52
@@ -81,7 +89,8 @@ def write(self):
81
89
ensure_ascii = self .ensure_ascii ,
82
90
date_unit = self .date_unit ,
83
91
iso_dates = self .date_format == 'iso' ,
84
- default_handler = self .default_handler )
92
+ default_handler = self .default_handler
93
+ )
85
94
86
95
87
96
class SeriesWriter (Writer ):
@@ -108,6 +117,50 @@ def _format_axes(self):
108
117
"'%s'." % self .orient )
109
118
110
119
120
+ class JSONTableWriter (FrameWriter ):
121
+ _default_orient = 'records'
122
+
123
+ def __init__ (self , obj , orient , date_format , double_precision ,
124
+ ensure_ascii , date_unit , default_handler = None ):
125
+ """
126
+ Adds a `schema` attribut with the Table Schema, resets
127
+ the index (can't do in caller, because the schema inference needs
128
+ to know what the index is, forces orient to records, and forces
129
+ date_format to 'iso'.
130
+ """
131
+ super (JSONTableWriter , self ).__init__ (
132
+ obj , orient , date_format , double_precision , ensure_ascii ,
133
+ date_unit , default_handler = default_handler )
134
+
135
+ if date_format != 'iso' :
136
+ msg = ("Trying to write with `orient='table'` and "
137
+ "`date_format='%s'`. Table Schema requires dates "
138
+ "to be formatted with `date_format='iso'`" % date_format )
139
+ raise ValueError (msg )
140
+
141
+ self .schema = build_table_schema (obj )
142
+ # TODO: Do this timedelta properly in objToJSON.c
143
+ # See GH #15137
144
+ obj = obj .copy ()
145
+ timedeltas = obj .select_dtypes (include = ['timedelta' ]).columns
146
+ if len (timedeltas ):
147
+ obj [timedeltas ] = obj [timedeltas ].applymap (
148
+ lambda x : x .isoformat ())
149
+ # Convert PeriodIndex to datetimes before serialzing
150
+ if is_period_dtype (obj .index ):
151
+ obj .index = obj .index .to_timestamp ()
152
+
153
+ self .obj = obj .reset_index ()
154
+ self .date_format = 'iso'
155
+ self .orient = 'records'
156
+
157
+ def write (self ):
158
+ data = super (JSONTableWriter , self ).write ()
159
+ serialized = '{{"schema": {}, "data": {}}}' .format (
160
+ dumps (self .schema ), data )
161
+ return serialized
162
+
163
+
111
164
def read_json (path_or_buf = None , orient = None , typ = 'frame' , dtype = True ,
112
165
convert_axes = True , convert_dates = True , keep_default_dates = True ,
113
166
numpy = False , precise_float = False , date_unit = None , encoding = None ,
@@ -244,6 +297,7 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True,
244
297
col 1 col 2
245
298
0 a b
246
299
1 c d
300
+
247
301
"""
248
302
249
303
filepath_or_buffer , _ , _ = get_filepath_or_buffer (path_or_buf ,
0 commit comments