Skip to content

Commit 09bc53a

Browse files
committed
Cleaned up code for table orient in read_json
1 parent fc893bd commit 09bc53a

File tree

2 files changed

+99
-102
lines changed

2 files changed

+99
-102
lines changed

pandas/io/json/table_schema.py

+40-11
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ def set_default_names(data):
8080
return data
8181

8282

83-
def make_field(arr, dtype=None):
83+
def convert_pandas_type_to_json_field(arr, dtype=None):
8484
dtype = dtype or arr.dtype
8585
if arr.name is None:
8686
name = 'values'
@@ -108,8 +108,8 @@ def make_field(arr, dtype=None):
108108
return field
109109

110110

111-
def revert_field(field):
112-
'''
111+
def convert_json_field_to_pandas_type(field):
112+
"""
113113
Converts a JSON field descriptor into its corresponding NumPy / pandas type
114114
115115
Parameters
@@ -120,9 +120,35 @@ def revert_field(field):
120120
Returns
121121
-------
122122
dtype
123-
'''
123+
124+
Raises
125+
-----
126+
ValueError
127+
If the type of the provided field is unknown or currently unsupported
128+
129+
Examples
130+
--------
131+
>>> convert_json_field_to_pandas_type({'name': 'an_int',
132+
'type': 'integer'})
133+
'int64'
134+
>>> convert_json_field_to_pandas_type({'name': 'a_categorical',
135+
'type': 'any',
136+
'contraints': {'enum': [
137+
'a', 'b', 'c']},
138+
'ordered': True})
139+
'CategoricalDtype(categories=['a', 'b', 'c'], ordered=True)'
140+
>>> convert_json_field_to_pandas_type({'name': 'a_datetime',
141+
'type': 'datetime'})
142+
'datetime64[ns]'
143+
>>> convert_json_field_to_pandas_type({'name': 'a_datetime_with_tz',
144+
'type': 'datetime',
145+
'tz': 'US/Central'})
146+
'datetime64[ns, US/Central]'
147+
"""
124148
typ = field['type']
125-
if typ == 'integer':
149+
if typ == 'string':
150+
return 'object'
151+
elif typ == 'integer':
126152
return 'int64'
127153
elif typ == 'number':
128154
return 'float64'
@@ -139,7 +165,10 @@ def revert_field(field):
139165
if 'constraints' in field and 'ordered' in field:
140166
return CategoricalDtype(categories=field['constraints']['enum'],
141167
ordered=field['ordered'])
142-
return 'object'
168+
else:
169+
return 'object'
170+
171+
raise ValueError("Unsupported or invalid field type: {}".format(typ))
143172

144173

145174
def build_table_schema(data, index=True, primary_key=None, version=True):
@@ -197,15 +226,15 @@ def build_table_schema(data, index=True, primary_key=None, version=True):
197226
if index:
198227
if data.index.nlevels > 1:
199228
for level in data.index.levels:
200-
fields.append(make_field(level))
229+
fields.append(convert_pandas_type_to_json_field(level))
201230
else:
202-
fields.append(make_field(data.index))
231+
fields.append(convert_pandas_type_to_json_field(data.index))
203232

204233
if data.ndim > 1:
205234
for column, s in data.iteritems():
206-
fields.append(make_field(s))
235+
fields.append(convert_pandas_type_to_json_field(s))
207236
else:
208-
fields.append(make_field(data))
237+
fields.append(convert_pandas_type_to_json_field(data))
209238

210239
schema['fields'] = fields
211240
if index and data.index.is_unique and primary_key is None:
@@ -251,7 +280,7 @@ def parse_table_schema(json, precise_float):
251280
col_order = [field['name'] for field in table['schema']['fields']]
252281
df = DataFrame(table['data'])[col_order]
253282

254-
dtypes = {field['name']: revert_field(field)
283+
dtypes = {field['name']: convert_json_field_to_pandas_type(field)
255284
for field in table['schema']['fields']}
256285

257286
# Cannot directly use as_type with timezone data on object; raise for now

pandas/tests/io/json/test_json_table_schema.py

+59-91
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,8 @@
1212
from pandas.io.json.table_schema import (
1313
as_json_table_type,
1414
build_table_schema,
15-
make_field,
15+
convert_pandas_type_to_json_field,
16+
convert_json_field_to_pandas_type,
1617
set_default_names)
1718
import pandas.util.testing as tm
1819

@@ -335,62 +336,89 @@ def test_date_format_raises(self):
335336
self.df.to_json(orient='table', date_format='iso')
336337
self.df.to_json(orient='table')
337338

338-
def test_make_field_int(self):
339+
def test_convert_pandas_type_to_json_field_int(self):
339340
data = [1, 2, 3]
340341
kinds = [pd.Series(data, name='name'), pd.Index(data, name='name')]
341342
for kind in kinds:
342-
result = make_field(kind)
343+
result = convert_pandas_type_to_json_field(kind)
343344
expected = {"name": "name", "type": 'integer'}
344345
assert result == expected
345346

346-
def test_make_field_float(self):
347+
def test_convert_pandas_type_to_json_field_float(self):
347348
data = [1., 2., 3.]
348349
kinds = [pd.Series(data, name='name'), pd.Index(data, name='name')]
349350
for kind in kinds:
350-
result = make_field(kind)
351+
result = convert_pandas_type_to_json_field(kind)
351352
expected = {"name": "name", "type": 'number'}
352353
assert result == expected
353354

354-
def test_make_field_datetime(self):
355+
def test_convert_pandas_type_to_json_field_datetime(self):
355356
data = [1., 2., 3.]
356357
kinds = [pd.Series(pd.to_datetime(data), name='values'),
357358
pd.to_datetime(data)]
358359
for kind in kinds:
359-
result = make_field(kind)
360+
result = convert_pandas_type_to_json_field(kind)
360361
expected = {"name": "values", "type": 'datetime'}
361362
assert result == expected
362363

363364
kinds = [pd.Series(pd.to_datetime(data, utc=True), name='values'),
364365
pd.to_datetime(data, utc=True)]
365366
for kind in kinds:
366-
result = make_field(kind)
367+
result = convert_pandas_type_to_json_field(kind)
367368
expected = {"name": "values", "type": 'datetime', "tz": "UTC"}
368369
assert result == expected
369370

370371
arr = pd.period_range('2016', freq='A-DEC', periods=4)
371-
result = make_field(arr)
372+
result = convert_pandas_type_to_json_field(arr)
372373
expected = {"name": "values", "type": 'datetime', "freq": "A-DEC"}
373374
assert result == expected
374375

375-
def test_make_field_categorical(self):
376+
def test_convert_pandas_type_to_json_field_categorical(self):
376377
data = ['a', 'b', 'c']
377378
ordereds = [True, False]
378379

379380
for ordered in ordereds:
380381
arr = pd.Series(pd.Categorical(data, ordered=ordered), name='cats')
381-
result = make_field(arr)
382+
result = convert_pandas_type_to_json_field(arr)
382383
expected = {"name": "cats", "type": "any",
383384
"constraints": {"enum": data},
384385
"ordered": ordered}
385386
assert result == expected
386387

387388
arr = pd.CategoricalIndex(data, ordered=ordered, name='cats')
388-
result = make_field(arr)
389+
result = convert_pandas_type_to_json_field(arr)
389390
expected = {"name": "cats", "type": "any",
390391
"constraints": {"enum": data},
391392
"ordered": ordered}
392393
assert result == expected
393394

395+
@pytest.mark.parametrize("inp,exp", [
396+
({'type': 'integer'}, 'int64'),
397+
({'type': 'number'}, 'float64'),
398+
({'type': 'boolean'}, 'bool'),
399+
({'type': 'duration'}, 'timedelta64'),
400+
({'type': 'datetime'}, 'datetime64[ns]'),
401+
({'type': 'datetime', 'tz': 'US/Hawaii'}, 'datetime64[ns, US/Hawaii]'),
402+
({'type': 'any'}, 'object'),
403+
({'type': 'any', 'constraints': {'enum': ['a', 'b', 'c']},
404+
'ordered': False}, CategoricalDtype(categories=['a', 'b', 'c'],
405+
ordered=False)),
406+
({'type': 'any', 'constraints': {'enum': ['a', 'b', 'c']},
407+
'ordered': True}, CategoricalDtype(categories=['a', 'b', 'c'],
408+
ordered=True)),
409+
({'type': 'string'}, 'object')])
410+
def test_convert_json_field_to_pandas_type(self, inp, exp):
411+
field = {'name': 'foo'}
412+
field.update(inp)
413+
assert convert_json_field_to_pandas_type(field) == exp
414+
415+
@pytest.mark.parametrize("inp", ["geopoint", "geojson", "fake_type"])
416+
def test_convert_json_field_to_pandas_type_raises(self, inp):
417+
field = {'type': inp}
418+
with tm.assert_raises_regex(ValueError, "Unsupported or invalid field "
419+
"type: {}".format(inp)):
420+
convert_json_field_to_pandas_type(field)
421+
394422
def test_categorical(self):
395423
s = pd.Series(pd.Categorical(['a', 'b', 'a']))
396424
s.index.name = 'idx'
@@ -476,92 +504,32 @@ def test_mi_falsey_name(self):
476504

477505
class TestTableOrientReader(object):
478506

479-
def test_integer(self):
480-
df = DataFrame(
481-
{'A': [1, 2, 3, 4],
482-
},
483-
index=pd.Index(range(4), name='idx'))
484-
out = df.to_json(orient="table")
485-
result = pd.read_json(out, orient="table")
486-
tm.assert_frame_equal(df, result)
487-
488-
def test_object(self):
489-
df = DataFrame(
490-
{'B': ['a', 'b', 'c', 'c'],
491-
},
492-
index=pd.Index(range(4), name='idx'))
493-
out = df.to_json(orient="table")
494-
result = pd.read_json(out, orient="table")
495-
tm.assert_frame_equal(df, result)
496-
497-
def test_date_range(self):
498-
df = DataFrame(
499-
{'C': pd.date_range('2016-01-01', freq='d', periods=4),
500-
},
501-
index=pd.Index(range(4), name='idx'))
502-
503-
out = df.to_json(orient="table")
504-
result = pd.read_json(out, orient="table")
505-
tm.assert_frame_equal(df, result)
506-
507-
def test_timedelta_raises(self):
508-
df = DataFrame(
509-
{'D': pd.timedelta_range('1H', periods=4, freq='T'),
510-
},
511-
index=pd.Index(range(4), name='idx'))
512-
513-
out = df.to_json(orient="table")
514-
with tm.assert_raises_regex(NotImplementedError, 'can not yet read '
515-
'ISO-formatted Timedelta data'):
516-
pd.read_json(out, orient="table")
517-
518-
def test_categorical(self):
519-
df = DataFrame(
520-
{'E': pd.Series(pd.Categorical(['a', 'b', 'c', 'c'])),
521-
'F': pd.Series(pd.Categorical(['a', 'b', 'c', 'c'],
522-
ordered=True)),
523-
},
524-
index=pd.Index(range(4), name='idx'))
525-
507+
@pytest.mark.parametrize("vals", [
508+
{'ints': [1, 2, 3, 4]},
509+
{'objects': ['a', 'b', 'c', 'd']},
510+
{'date_ranges': pd.date_range('2016-01-01', freq='d', periods=4)},
511+
{'categoricals': pd.Series(pd.Categorical(['a', 'b', 'c', 'c']))},
512+
{'ordered_cats': pd.Series(pd.Categorical(['a', 'b', 'c', 'c'],
513+
ordered=True))},
514+
pytest.param({'floats': [1., 2., 3., 4.]}, marks=pytest.mark.xfail),
515+
{'floats': [1.1, 2.2, 3.3, 4.4]},
516+
{'bools': [True, False, False, True]}])
517+
def test_read_json_table_orient(self, vals):
518+
df = DataFrame(vals, index=pd.Index(range(4), name='idx'))
526519
out = df.to_json(orient="table")
527520
result = pd.read_json(out, orient="table")
528521
tm.assert_frame_equal(df, result)
529522

530-
@pytest.mark.parametrize("float_vals", [
531-
pytest.param([1., 2., 3., 4.], marks=pytest.mark.xfail),
532-
[1.1, 2.2, 3.3, 4.4]])
533-
def test_float(self, float_vals):
534-
df = DataFrame(
535-
{'G': float_vals,
536-
},
537-
index=pd.Index(range(4), name='idx'))
538-
523+
@pytest.mark.parametrize("vals", [
524+
{'timedeltas': pd.timedelta_range('1H', periods=4, freq='T')},
525+
{'timezones': pd.date_range('2016-01-01', freq='d', periods=4,
526+
tz='US/Central')}])
527+
def test_read_json_table_orient_raises(self, vals):
528+
df = DataFrame(vals, index=pd.Index(range(4), name='idx'))
539529
out = df.to_json(orient="table")
540-
result = pd.read_json(out, orient="table", convert_axes=False)
541-
tm.assert_frame_equal(df, result)
542-
543-
def test_timezone_raises(self):
544-
df = DataFrame(
545-
{'H': pd.date_range('2016-01-01', freq='d', periods=4,
546-
tz='US/Central'),
547-
},
548-
index=pd.Index(range(4), name='idx'))
549-
550-
out = df.to_json(orient="table")
551-
with tm.assert_raises_regex(NotImplementedError, 'can not yet read '
552-
'timezone data'):
530+
with tm.assert_raises_regex(NotImplementedError, 'can not yet read '):
553531
pd.read_json(out, orient="table")
554532

555-
def test_bool(self):
556-
df = DataFrame(
557-
{'I': [True, False, False, True],
558-
},
559-
index=pd.Index(range(4), name='idx'))
560-
561-
out = df.to_json(orient="table")
562-
result = pd.read_json(out, orient="table")
563-
tm.assert_frame_equal(df, result)
564-
565533
def test_comprehensive(self):
566534
df = DataFrame(
567535
{'A': [1, 2, 3, 4],

0 commit comments

Comments
 (0)