2
2
3
3
import os
4
4
import copy
5
+ import json
5
6
from collections import defaultdict
6
7
import numpy as np
7
8
11
12
from pandas import compat , isnull
12
13
from pandas import Series , DataFrame , to_datetime
13
14
from pandas .io .common import get_filepath_or_buffer , _get_handle
15
+ from pandas .core import config
14
16
from pandas .core .common import AbstractMethodError
15
17
from pandas .formats .printing import pprint_thing
16
-
18
+ from pandas .types .common import (
19
+ is_integer_dtype , is_timedelta64_dtype , is_string_dtype , is_numeric_dtype ,
20
+ is_bool_dtype , is_datetime64_dtype
21
+ )
17
22
loads = _json .loads
18
23
dumps = _json .dumps
19
24
@@ -61,6 +66,22 @@ def __init__(self, obj, orient, date_format, double_precision,
61
66
if orient is None :
62
67
orient = self ._default_orient
63
68
69
+ self .is_jsontable_schema = orient == 'jsontable_schema'
70
+ if self .is_jsontable_schema :
71
+ self .schema = to_json_schema (obj )
72
+
73
+ # XXX: Do this timedelta properly in to_json
74
+ sample = obj .head (
75
+ config .get_option ('display.max_rows' )).reset_index ()
76
+ timedeltas = sample .select_dtypes (include = ['timedelta' ]).columns
77
+ sample [timedeltas ] = sample [timedeltas ].applymap (isoformat )
78
+ self .obj = sample
79
+ date_format = 'iso' # ignoring user input, but epoch not allowed
80
+ orient = 'records'
81
+
82
+ else :
83
+ self .schema = None
84
+
64
85
self .orient = orient
65
86
self .date_format = date_format
66
87
self .double_precision = double_precision
@@ -75,14 +96,19 @@ def _format_axes(self):
75
96
raise AbstractMethodError (self )
76
97
77
98
def write (self ):
78
- return dumps (
99
+ serialized = dumps (
79
100
self .obj ,
80
101
orient = self .orient ,
81
102
double_precision = self .double_precision ,
82
103
ensure_ascii = self .ensure_ascii ,
83
104
date_unit = self .date_unit ,
84
105
iso_dates = self .date_format == 'iso' ,
85
- default_handler = self .default_handler )
106
+ default_handler = self .default_handler
107
+ )
108
+ if self .is_jsontable_schema :
109
+ serialized = '{{"schema": {}, "data": {}}}' .format (
110
+ json .dumps (self .schema ), serialized )
111
+ return serialized
86
112
87
113
88
114
class SeriesWriter (Writer ):
@@ -884,10 +910,6 @@ def _recursive_extract(data, path, seen_meta, level=0):
884
910
885
911
return result
886
912
887
- # ---------------------------------------------------------------------
888
- # JSON-Table Schema routines
889
- # http://specs.frictionlessdata.io/json-table-schema/
890
-
891
913
892
914
# TODO: Make method on Timedelta?
893
915
def isoformat (x ):
@@ -909,3 +931,134 @@ def isoformat(x):
909
931
tpl = 'Pn{td.days}Tn{td.hours}n{td.minutes}n{seconds}' .format (
910
932
td = components , seconds = seconds )
911
933
return tpl
934
+
935
+ # ---------------------------------------------------------------------
936
+ # JSON-Table Schema routines
937
+ # http://specs.frictionlessdata.io/json-table-schema/
938
+
939
+
940
+ def as_jsontable_type (x ):
941
+ """
942
+ Convert a NumPy / pandas type to its corresponding jsontable type
943
+
944
+ ============== ======================
945
+ Pandas type JSON Table Schema type
946
+ ============== ======================
947
+ int64 integer
948
+ float64 number
949
+ bool boolean
950
+ datetime64[ns] date
951
+ timedelta64[ns] duration
952
+ =============== ======================
953
+ """
954
+ if is_integer_dtype (x ):
955
+ return 'integer'
956
+ elif is_bool_dtype (x ):
957
+ return 'boolean'
958
+ elif is_numeric_dtype (x ):
959
+ return 'number'
960
+ elif is_datetime64_dtype (x ):
961
+ return 'date'
962
+ elif is_timedelta64_dtype (x ):
963
+ return 'duration'
964
+ elif is_string_dtype (x ):
965
+ return 'string'
966
+ else :
967
+ return 'any'
968
+
969
+
970
+ def _set_default_names (data ):
971
+ """Sets index names to 'index' for regular, or 'level_x' for Multi"""
972
+ if all (name is not None for name in data .index .names ):
973
+ return data
974
+
975
+ data = data .copy ()
976
+ if data .index .nlevels > 1 :
977
+ names = [name if name is not None else 'level_{}' .format (i )
978
+ for i , name in enumerate (data .index .names )]
979
+ data .index .names = names
980
+ else :
981
+ data .index .name = 'index'
982
+ return data
983
+
984
+
985
+ def to_json_schema (data , index = True , primary_key = None ):
986
+ """
987
+ Create a JSON Table schema from ``data``.
988
+
989
+ Parameters
990
+ ----------
991
+ data : Series, DataFrame
992
+ index : bool
993
+ Whether to include ``data.index`` in the schema.
994
+ primary_key : bool or None
995
+ column names to designate as the primary key.
996
+ The default `None` will set `'primary_key'` to the index
997
+ level or levels if the index is unique.
998
+
999
+ Returns
1000
+ -------
1001
+ schema : dict
1002
+
1003
+ Examples
1004
+ --------
1005
+ >>> df = pd.DataFrame(
1006
+ ... {'A': [1, 2, 3],
1007
+ ... 'B': ['a', 'b', 'c'],
1008
+ ... 'C': pd.date_range('2016-01-01', freq='d', periods=3),
1009
+ ... }, index=pd.Index(range(3), name='idx'))
1010
+ >>> pd.to_json_schema(df)
1011
+ {'fields': [{'name': 'idx', 'type': 'integer'},
1012
+ {'name': 'A', 'type': 'integer'},
1013
+ {'name': 'B', 'type': 'string'},
1014
+ {'name': 'C', 'type': 'date'}],
1015
+ 'primary_key': 'idx'}
1016
+
1017
+ Notes
1018
+ -----
1019
+ See `as_jsontable_type` for conversion types.
1020
+ Timedeltas as converted to ISO8601 duration format with
1021
+ 9 decimal places after the secnods field for nanosecond precision.
1022
+ """
1023
+ if index is True :
1024
+ data = _set_default_names (data )
1025
+
1026
+ schema = {}
1027
+ fields = []
1028
+
1029
+ if index :
1030
+ if data .index .nlevels > 1 :
1031
+ for level in data .index .levels :
1032
+ fields .append ({'name' : level .name ,
1033
+ 'type' : as_jsontable_type (level .dtype )})
1034
+ else :
1035
+ fields .append ({'name' : data .index .name ,
1036
+ 'type' : as_jsontable_type (data .index .dtype )})
1037
+
1038
+ if data .ndim > 1 :
1039
+ for column , type_ in data .dtypes .iteritems ():
1040
+ fields .append ({'name' : column ,
1041
+ 'type' : as_jsontable_type (type_ )})
1042
+ else :
1043
+ fields .append ({
1044
+ 'name' : data .name if data .name is not None else 'values' ,
1045
+ 'type' : as_jsontable_type (data .dtype )})
1046
+
1047
+ schema ['fields' ] = fields
1048
+ if index and data .index .is_unique and primary_key is None :
1049
+ # TODO: Always a list, spec allows for a string scalar.
1050
+ if data .index .nlevels == 1 :
1051
+ schema ['primary_key' ] = data .index .name
1052
+ else :
1053
+ schema ['primary_key' ] = data .index .names
1054
+ elif primary_key is not None :
1055
+ schema ['primary_key' ] = primary_key
1056
+ return schema
1057
+
1058
+
1059
+ def publish_tableschema (data ):
1060
+ """Temporary helper for testing w/ frontend"""
1061
+ from IPython .display import display
1062
+ mimetype = 'application/vnd.tableschema.v1+json'
1063
+ payload = data .to_json (orient = 'jsontable_schema' )
1064
+ display ({mimetype : payload }, raw = True )
0 commit comments