2
2
3
3
import os
4
4
import copy
5
+ import json
5
6
from collections import defaultdict
6
7
import numpy as np
7
8
11
12
from pandas import compat , isnull
12
13
from pandas import Series , DataFrame , to_datetime
13
14
from pandas .io .common import get_filepath_or_buffer , _get_handle
15
+ from pandas .core import config
14
16
from pandas .core .common import AbstractMethodError
15
17
from pandas .formats .printing import pprint_thing
16
-
18
+ from pandas .types .common import (
19
+ is_integer_dtype , is_timedelta64_dtype , is_string_dtype , is_numeric_dtype ,
20
+ is_bool_dtype , is_datetime64_dtype
21
+ )
17
22
loads = _json .loads
18
23
dumps = _json .dumps
19
24
@@ -61,6 +66,22 @@ def __init__(self, obj, orient, date_format, double_precision,
61
66
if orient is None :
62
67
orient = self ._default_orient
63
68
69
+ self .is_jsontable_schema = orient == 'jsontable_schema'
70
+ if self .is_jsontable_schema :
71
+ self .schema = to_json_schema (obj )
72
+
73
+ # XXX: Do this timedelta properly in to_json
74
+ sample = obj .head (
75
+ config .get_option ('display.max_rows' )).reset_index ()
76
+ timedeltas = sample .select_dtypes (include = ['timedelta' ]).columns
77
+ sample [timedeltas ] = sample [timedeltas ].applymap (isoformat )
78
+ self .obj = sample
79
+ date_format = 'iso' # ignoring user input, but epoch not allowed
80
+ orient = 'records'
81
+
82
+ else :
83
+ self .schema = None
84
+
64
85
self .orient = orient
65
86
self .date_format = date_format
66
87
self .double_precision = double_precision
@@ -75,14 +96,19 @@ def _format_axes(self):
75
96
raise AbstractMethodError (self )
76
97
77
98
def write (self ):
78
- return dumps (
99
+ serialized = dumps (
79
100
self .obj ,
80
101
orient = self .orient ,
81
102
double_precision = self .double_precision ,
82
103
ensure_ascii = self .ensure_ascii ,
83
104
date_unit = self .date_unit ,
84
105
iso_dates = self .date_format == 'iso' ,
85
- default_handler = self .default_handler )
106
+ default_handler = self .default_handler
107
+ )
108
+ if self .is_jsontable_schema :
109
+ serialized = '{{"schema": {}, "data": {}}}' .format (
110
+ json .dumps (self .schema ), serialized )
111
+ return serialized
86
112
87
113
88
114
class SeriesWriter (Writer ):
@@ -884,3 +910,135 @@ def _recursive_extract(data, path, seen_meta, level=0):
884
910
result [k ] = np .array (v ).repeat (lengths )
885
911
886
912
return result
913
+
914
+
915
+ # ---------------------------------------------------------------------
916
+ # JSON-Table Schema routines
917
+ # http://specs.frictionlessdata.io/json-table-schema/
918
+
919
+
920
+ def as_jsontable_type (x ):
921
+ """
922
+ Convert a NumPy / pandas type to its corresponding jsontable type
923
+
924
+ ============== ======================
925
+ Pandas type JSON Table Schema type
926
+ ============== ======================
927
+ int64 integer
928
+ float64 number
929
+ bool boolean
930
+ datetime64[ns] date
931
+ timedelta64[ns] duration
932
+ =============== ======================
933
+ """
934
+ if is_integer_dtype (x ):
935
+ return 'integer'
936
+ elif is_bool_dtype (x ):
937
+ return 'boolean'
938
+ elif is_numeric_dtype (x ):
939
+ return 'number'
940
+ elif is_datetime64_dtype (x ):
941
+ return 'date'
942
+ elif is_timedelta64_dtype (x ):
943
+ return 'duration'
944
+ elif is_string_dtype (x ):
945
+ return 'string'
946
+ else :
947
+ return 'any'
948
+
949
+
950
+ def _set_default_names (data ):
951
+ """Sets index names to 'index' for regular, or 'level_x' for Multi"""
952
+ if all (name is not None for name in data .index .names ):
953
+ return data
954
+
955
+ data = data .copy ()
956
+ if data .index .nlevels > 1 :
957
+ names = [name if name is not None else 'level_{}' .format (i )
958
+ for i , name in enumerate (data .index .names )]
959
+ data .index .names = names
960
+ else :
961
+ data .index .name = 'index'
962
+ return data
963
+
964
+
965
+ def to_json_schema (data , index = True , primary_key = None ):
966
+ """
967
+ Create a JSON Table schema from ``data``.
968
+
969
+ Parameters
970
+ ----------
971
+ data : Series, DataFrame
972
+ index : bool
973
+ Whether to include ``data.index`` in the schema.
974
+ primary_key : bool or None
975
+ column names to designate as the primary key.
976
+ The default `None` will set `'primary_key'` to the index
977
+ level or levels if the index is unique.
978
+
979
+ Returns
980
+ -------
981
+ schema : dict
982
+
983
+ Examples
984
+ --------
985
+ >>> df = pd.DataFrame(
986
+ ... {'A': [1, 2, 3],
987
+ ... 'B': ['a', 'b', 'c'],
988
+ ... 'C': pd.date_range('2016-01-01', freq='d', periods=3),
989
+ ... }, index=pd.Index(range(3), name='idx'))
990
+ >>> pd.to_json_schema(df)
991
+ {'fields': [{'name': 'idx', 'type': 'integer'},
992
+ {'name': 'A', 'type': 'integer'},
993
+ {'name': 'B', 'type': 'string'},
994
+ {'name': 'C', 'type': 'date'}],
995
+ 'primary_key': 'idx'}
996
+
997
+ Notes
998
+ -----
999
+ See `as_jsontable_type` for conversion types.
1000
+ Timedeltas as converted to ISO8601 duration format with
1001
+ 9 decimal places after the secnods field for nanosecond precision.
1002
+ """
1003
+ if index is True :
1004
+ data = _set_default_names (data )
1005
+
1006
+ schema = {}
1007
+ fields = []
1008
+
1009
+ if index :
1010
+ if data .index .nlevels > 1 :
1011
+ for level in data .index .levels :
1012
+ fields .append ({'name' : level .name ,
1013
+ 'type' : as_jsontable_type (level .dtype )})
1014
+ else :
1015
+ fields .append ({'name' : data .index .name ,
1016
+ 'type' : as_jsontable_type (data .index .dtype )})
1017
+
1018
+ if data .ndim > 1 :
1019
+ for column , type_ in data .dtypes .iteritems ():
1020
+ fields .append ({'name' : column ,
1021
+ 'type' : as_jsontable_type (type_ )})
1022
+ else :
1023
+ fields .append ({
1024
+ 'name' : data .name if data .name is not None else 'values' ,
1025
+ 'type' : as_jsontable_type (data .dtype )})
1026
+
1027
+ schema ['fields' ] = fields
1028
+ if index and data .index .is_unique and primary_key is None :
1029
+ # TODO: Always a list, spec allows for a string scalar.
1030
+ if data .index .nlevels == 1 :
1031
+ schema ['primary_key' ] = data .index .name
1032
+ else :
1033
+ schema ['primary_key' ] = data .index .names
1034
+ elif primary_key is not None :
1035
+ schema ['primary_key' ] = primary_key
1036
+ return schema
1037
+
1038
+
1039
+ def publish_tableschema (data ):
1040
+ """Temporary helper for testing w/ frontend"""
1041
+ from IPython .display import display
1042
+ mimetype = 'application/vnd.tableschema.v1+json'
1043
+ payload = data .to_json (orient = 'jsontable_schema' )
1044
+ display ({mimetype : payload }, raw = True )
0 commit comments