Skip to content

Commit 4319335

Browse files
committed
read_json support for orient="table"
1 parent 04beec7 commit 4319335

File tree

3 files changed

+187
-1
lines changed

3 files changed

+187
-1
lines changed

pandas/io/json/json.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
from pandas.core.reshape.concat import concat
1717
from pandas.io.formats.printing import pprint_thing
1818
from .normalize import _convert_to_line_delimits
19-
from .table_schema import build_table_schema
19+
from .table_schema import build_table_schema, parse_table_schema
2020
from pandas.core.dtypes.common import is_period_dtype
2121

2222
loads = json.loads
@@ -839,6 +839,9 @@ def _parse_no_numpy(self):
839839
elif orient == "index":
840840
self.obj = DataFrame(
841841
loads(json, precise_float=self.precise_float), dtype=None).T
842+
elif orient == 'table':
843+
self.obj = parse_table_schema(json,
844+
precise_float=self.precise_float)
842845
else:
843846
self.obj = DataFrame(
844847
loads(json, precise_float=self.precise_float), dtype=None)

pandas/io/json/table_schema.py

+59
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,20 @@
33
44
http://specs.frictionlessdata.io/json-table-schema/
55
"""
6+
from collections import OrderedDict
7+
8+
import pandas._libs.json as json
9+
from pandas import DataFrame
10+
from pandas.api.types import CategoricalDtype
611
from pandas.core.common import _all_not_none
712
from pandas.core.dtypes.common import (
813
is_integer_dtype, is_timedelta64_dtype, is_numeric_dtype,
914
is_bool_dtype, is_datetime64_dtype, is_datetime64tz_dtype,
1015
is_categorical_dtype, is_period_dtype, is_string_dtype
1116
)
1217

18+
loads = json.loads
19+
1320

1421
def as_json_table_type(x):
1522
"""
@@ -103,6 +110,28 @@ def make_field(arr, dtype=None):
103110
return field
104111

105112

113+
def revert_field(field):
114+
typ = field['type']
115+
if typ == 'integer':
116+
return 'int64'
117+
elif typ == 'number':
118+
return 'float64'
119+
elif typ == 'boolean':
120+
return 'bool'
121+
elif typ == 'duration':
122+
return 'timedelta64'
123+
elif typ == 'datetime':
124+
if field.get('tz'):
125+
return 'datetime64[ns, {tz}]'.format(tz=field['tz'])
126+
else:
127+
return 'datetime64[ns]'
128+
elif typ == 'any':
129+
if 'constraints' in field and 'ordered' in field:
130+
return CategoricalDtype(categories=field['constraints']['enum'],
131+
ordered=field['ordered'])
132+
return 'object'
133+
134+
106135
def build_table_schema(data, index=True, primary_key=None, version=True):
107136
"""
108137
Create a Table schema from ``data``.
@@ -180,3 +209,33 @@ def build_table_schema(data, index=True, primary_key=None, version=True):
180209
if version:
181210
schema['pandas_version'] = '0.20.0'
182211
return schema
212+
213+
214+
def parse_table_schema(json, precise_float):
215+
"""
216+
Builds a DataFrame from a given schema
217+
"""
218+
table = loads(json, precise_float=precise_float)
219+
data = [OrderedDict(x) for x in table['data']]
220+
df = DataFrame(data)
221+
222+
dtypes = {field['name']: revert_field(field)
223+
for field in table['schema']['fields']}
224+
225+
# Cannot directly use as_type with timezone data on object; raise for now
226+
if any(str(x).startswith('datetime64[ns, ') for x in dtypes.values()):
227+
raise NotImplementedError('table="orient" can not yet read timezone '
228+
'data')
229+
230+
# No ISO constructor for Timedelta as of yet, so need to raise
231+
if 'timedelta64' in dtypes.values():
232+
raise NotImplementedError('table="orient" can not yet read '
233+
'ISO-formatted Timedelta data')
234+
235+
df = df.astype(dtypes)
236+
237+
df = df.set_index(table['schema']['primaryKey'])
238+
if all(x.startswith('level_') for x in df.index.names):
239+
df.index.names = [None] * len(df.index.names)
240+
241+
return df

pandas/tests/io/json/test_json_table_schema.py

+124
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
build_table_schema,
1515
make_field,
1616
set_default_names)
17+
import pandas.util.testing as tm
1718

1819

1920
class TestBuildSchema(object):
@@ -471,3 +472,126 @@ def test_mi_falsey_name(self):
471472
('a', 'b')]))
472473
result = [x['name'] for x in build_table_schema(df)['fields']]
473474
assert result == ['level_0', 'level_1', 0, 1, 2, 3]
475+
476+
477+
class TestTableOrientReader(object):
478+
479+
def test_integer(self):
480+
df = DataFrame(
481+
{'A': [1, 2, 3, 4],
482+
},
483+
index=pd.Index(range(4), name='idx'))
484+
out = df.to_json(orient="table")
485+
result = pd.read_json(out, orient="table")
486+
tm.assert_frame_equal(df, result)
487+
488+
def test_object(self):
489+
df = DataFrame(
490+
{'B': ['a', 'b', 'c', 'c'],
491+
},
492+
index=pd.Index(range(4), name='idx'))
493+
out = df.to_json(orient="table")
494+
result = pd.read_json(out, orient="table")
495+
tm.assert_frame_equal(df, result)
496+
497+
def test_date_range(self):
498+
df = DataFrame(
499+
{'C': pd.date_range('2016-01-01', freq='d', periods=4),
500+
},
501+
index=pd.Index(range(4), name='idx'))
502+
503+
out = df.to_json(orient="table")
504+
result = pd.read_json(out, orient="table")
505+
tm.assert_frame_equal(df, result)
506+
507+
def test_timedelta_raises(self):
508+
df = DataFrame(
509+
{'D': pd.timedelta_range('1H', periods=4, freq='T'),
510+
},
511+
index=pd.Index(range(4), name='idx'))
512+
513+
out = df.to_json(orient="table")
514+
with tm.assert_raises_regex(NotImplementedError, 'can not yet read '
515+
'ISO-formatted Timedelta data'):
516+
pd.read_json(out, orient="table")
517+
518+
def test_categorical(self):
519+
df = DataFrame(
520+
{'E': pd.Series(pd.Categorical(['a', 'b', 'c', 'c'])),
521+
'F': pd.Series(pd.Categorical(['a', 'b', 'c', 'c'],
522+
ordered=True)),
523+
},
524+
index=pd.Index(range(4), name='idx'))
525+
526+
out = df.to_json(orient="table")
527+
result = pd.read_json(out, orient="table")
528+
tm.assert_frame_equal(df, result)
529+
530+
@pytest.mark.parametrize("float_vals", [
531+
pytest.param([1., 2., 3., 4.], marks=pytest.mark.xfail),
532+
[1.1, 2.2, 3.3, 4.4]])
533+
def test_float(self, float_vals):
534+
df = DataFrame(
535+
{'G': float_vals,
536+
},
537+
index=pd.Index(range(4), name='idx'))
538+
539+
out = df.to_json(orient="table")
540+
result = pd.read_json(out, orient="table", convert_axes=False)
541+
tm.assert_frame_equal(df, result)
542+
543+
def test_timezone_raises(self):
544+
df = DataFrame(
545+
{'H': pd.date_range('2016-01-01', freq='d', periods=4,
546+
tz='US/Central'),
547+
},
548+
index=pd.Index(range(4), name='idx'))
549+
550+
out = df.to_json(orient="table")
551+
with tm.assert_raises_regex(NotImplementedError, 'can not yet read '
552+
'timezone data'):
553+
pd.read_json(out, orient="table")
554+
555+
def test_bool(self):
556+
df = DataFrame(
557+
{'I': [True, False, False, True],
558+
},
559+
index=pd.Index(range(4), name='idx'))
560+
561+
out = df.to_json(orient="table")
562+
result = pd.read_json(out, orient="table")
563+
tm.assert_frame_equal(df, result)
564+
565+
def test_comprehensive(self):
566+
df = DataFrame(
567+
{'A': [1, 2, 3, 4],
568+
'B': ['a', 'b', 'c', 'c'],
569+
'C': pd.date_range('2016-01-01', freq='d', periods=4),
570+
# 'D': pd.timedelta_range('1H', periods=4, freq='T'),
571+
'E': pd.Series(pd.Categorical(['a', 'b', 'c', 'c'])),
572+
'F': pd.Series(pd.Categorical(['a', 'b', 'c', 'c'],
573+
ordered=True)),
574+
'G': [1.1, 2.2, 3.3, 4.4],
575+
# 'H': pd.date_range('2016-01-01', freq='d', periods=4,
576+
# tz='US/Central'),
577+
'I': [True, False, False, True],
578+
},
579+
index=pd.Index(range(4), name='idx'))
580+
581+
out = df.to_json(orient="table")
582+
result = pd.read_json(out, orient="table")
583+
tm.assert_frame_equal(df, result)
584+
585+
@pytest.mark.parametrize("index_names", [[None, None], ['foo', 'bar']])
586+
def test_multiindex(self, index_names):
587+
# GH 18912
588+
df = pd.DataFrame(
589+
[["Arr", "alpha", [1, 2, 3, 4]],
590+
["Bee", "Beta", [10, 20, 30, 40]]],
591+
index=[["A", "B"], ["Null", "Eins"]],
592+
columns=["Aussprache", "Griechisch", "Args"]
593+
)
594+
df.index.names = index_names
595+
out = df.to_json(orient="table")
596+
result = pd.read_json(out, orient="table")
597+
tm.assert_frame_equal(df, result)

0 commit comments

Comments
 (0)