1
1
# pylint: disable-msg=E1101,W0613,W0603
2
+ from __future__ import absolute_import
2
3
3
4
import os
5
+ import json
4
6
import numpy as np
5
7
6
8
import pandas .json as _json
12
14
from pandas .core .common import AbstractMethodError
13
15
from pandas .formats .printing import pprint_thing
14
16
from .normalize import _convert_to_line_delimits
17
+ from .table_schema import build_table_schema
18
+ from pandas .types .common import is_period_dtype
15
19
16
20
loads = _json .loads
17
21
dumps = _json .dumps
18
22
23
+ TABLE_SCHEMA_VERSION = '0.20.0'
24
+
19
25
20
26
# interface to/from
21
27
def to_json (path_or_buf , obj , orient = None , date_format = 'epoch' ,
@@ -26,19 +32,22 @@ def to_json(path_or_buf, obj, orient=None, date_format='epoch',
26
32
raise ValueError (
27
33
"'lines' keyword only valid when 'orient' is records" )
28
34
29
- if isinstance (obj , Series ):
30
- s = SeriesWriter (
31
- obj , orient = orient , date_format = date_format ,
32
- double_precision = double_precision , ensure_ascii = force_ascii ,
33
- date_unit = date_unit , default_handler = default_handler ).write ()
35
+ if orient == 'table' and isinstance (obj , Series ):
36
+ obj = obj .to_frame (name = obj .name or 'values' )
37
+ if orient == 'table' and isinstance (obj , DataFrame ):
38
+ writer = JSONTableWriter
39
+ elif isinstance (obj , Series ):
40
+ writer = SeriesWriter
34
41
elif isinstance (obj , DataFrame ):
35
- s = FrameWriter (
36
- obj , orient = orient , date_format = date_format ,
37
- double_precision = double_precision , ensure_ascii = force_ascii ,
38
- date_unit = date_unit , default_handler = default_handler ).write ()
42
+ writer = FrameWriter
39
43
else :
40
44
raise NotImplementedError ("'obj' should be a Series or a DataFrame" )
41
45
46
+ s = writer (
47
+ obj , orient = orient , date_format = date_format ,
48
+ double_precision = double_precision , ensure_ascii = force_ascii ,
49
+ date_unit = date_unit , default_handler = default_handler ).write ()
50
+
42
51
if lines :
43
52
s = _convert_to_line_delimits (s )
44
53
@@ -81,7 +90,8 @@ def write(self):
81
90
ensure_ascii = self .ensure_ascii ,
82
91
date_unit = self .date_unit ,
83
92
iso_dates = self .date_format == 'iso' ,
84
- default_handler = self .default_handler )
93
+ default_handler = self .default_handler
94
+ )
85
95
86
96
87
97
class SeriesWriter (Writer ):
@@ -108,6 +118,49 @@ def _format_axes(self):
108
118
"'%s'." % self .orient )
109
119
110
120
121
+ class JSONTableWriter (FrameWriter ):
122
+ _default_orient = 'records'
123
+
124
+ def __init__ (self , obj , orient , date_format , double_precision ,
125
+ ensure_ascii , date_unit , default_handler = None ):
126
+ """
127
+ Adds a `schema` attribut with the Table Schema, resets
128
+ the index (can't do in caller, because the schema inference needs
129
+ to know what the index is, forces orient to records, and forces
130
+ date_format to 'iso'.
131
+ """
132
+ super (JSONTableWriter , self ).__init__ (
133
+ obj , orient , date_format , double_precision , ensure_ascii ,
134
+ date_unit , default_handler = default_handler )
135
+
136
+ if date_format != 'iso' :
137
+ msg = ("Trying to write with `orient='table'` and "
138
+ "`date_format='%s'`. Table Schema requires dates "
139
+ "to be formatted with `date_format='iso'`" % date_format )
140
+ raise ValueError (msg )
141
+
142
+ self .schema = build_table_schema (obj )
143
+ # TODO: Do this timedelta properly in objToJSON.c
144
+ # See GH #15137
145
+ obj = obj .copy ()
146
+ timedeltas = obj .select_dtypes (include = ['timedelta' ]).columns
147
+ obj [timedeltas ] = obj [timedeltas ].applymap (
148
+ lambda x : x .isoformat ())
149
+ # Convert PeriodIndex to datetimes before serialzing
150
+ if is_period_dtype (obj .index ):
151
+ obj .index = obj .index .to_timestamp ()
152
+
153
+ self .obj = obj .reset_index ()
154
+ self .date_format = 'iso'
155
+ self .orient = 'records'
156
+
157
+ def write (self ):
158
+ data = super (JSONTableWriter , self ).write ()
159
+ serialized = '{{"schema": {}, "data": {}}}' .format (
160
+ json .dumps (self .schema , sort_keys = False ), data )
161
+ return serialized
162
+
163
+
111
164
def read_json (path_or_buf = None , orient = None , typ = 'frame' , dtype = True ,
112
165
convert_axes = True , convert_dates = True , keep_default_dates = True ,
113
166
numpy = False , precise_float = False , date_unit = None , encoding = None ,
@@ -244,6 +297,7 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True,
244
297
col 1 col 2
245
298
0 a b
246
299
1 c d
300
+
247
301
"""
248
302
249
303
filepath_or_buffer , _ , _ = get_filepath_or_buffer (path_or_buf ,
0 commit comments