Skip to content

Commit 32ae80c

Browse files
committed
Cleaned up code for table orient in read_json
1 parent 8e91151 commit 32ae80c

File tree

3 files changed

+160
-105
lines changed

3 files changed

+160
-105
lines changed

doc/source/whatsnew/v0.23.0.txt

+45-1
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,51 @@ Current Behavior
119119

120120
s.rank(na_option='top')
121121

122+
.. _whatsnew_0230.enhancements.round-trippable_json:
123+
124+
JSON read/write round-trippable with ``orient='table'``
125+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
126+
127+
A ``DataFrame`` can now be written to and subsequently read back via JSON while preserving metadata through usage of the ``orient='table'`` argument (see :issue:`18912` and :issue:`9146`). Previously, none of the available ``orient`` values guaranteed the preservation of dtypes and index names, amongst other metadata.
128+
129+
.. ipython:: python
130+
131+
df = pd.DataFrame({'foo': [1, 2, 3, 4],
132+
'bar': ['a', 'b', 'c', 'd'],
133+
'baz': pd.date_range('2018-01-01', freq='d', periods=4),
134+
'qux': pd.Categorical(['a', 'b', 'c', 'c'])
135+
}, index=pd.Index(range(4), name='idx'))
136+
df
137+
138+
Previous Behavior:
139+
140+
.. code-block:: ipython
141+
142+
In [17]: df.to_json("test.json", orient='columns')
143+
In [17]: pd.read_json("test.json", orient='columns')
144+
Out[18]:
145+
bar baz foo qux
146+
0 a 1514764800000 1 a
147+
1 b 1514851200000 2 b
148+
2 c 1514937600000 3 c
149+
3 d 1515024000000 4 c
150+
151+
Current Behavior:
152+
153+
.. code-block:: ipython
154+
155+
In [29]: df.to_json("test.json", orient='table')
156+
In [30]: pd.read_json("test.json", orient='table')
157+
Out[30]:
158+
bar baz foo qux
159+
idx
160+
0 a 2018-01-01 1 a
161+
1 b 2018-01-02 2 b
162+
2 c 2018-01-03 3 c
163+
3 d 2018-01-04 4 c
164+
165+
Please note that the string `index` is not supported with the round trip format, as it is used by default in ``write_json`` to indicate a missing index name.
166+
122167
.. _whatsnew_0230.enhancements.other:
123168

124169
Other Enhancements
@@ -145,7 +190,6 @@ Other Enhancements
145190
- ``Resampler`` objects now have a functioning :attr:`~pandas.core.resample.Resampler.pipe` method.
146191
Previously, calls to ``pipe`` were diverted to the ``mean`` method (:issue:`17905`).
147192
- :func:`~pandas.api.types.is_scalar` now returns ``True`` for ``DateOffset`` objects (:issue:`18943`).
148-
- :func:`read_json` now supports ``table`` as a value to the ``orient`` argument (:issue:`18912`)
149193

150194
.. _whatsnew_0230.api_breaking:
151195

pandas/io/json/table_schema.py

+52-13
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ def set_default_names(data):
8080
return data
8181

8282

83-
def make_field(arr, dtype=None):
83+
def convert_pandas_type_to_json_field(arr, dtype=None):
8484
dtype = dtype or arr.dtype
8585
if arr.name is None:
8686
name = 'values'
@@ -108,8 +108,8 @@ def make_field(arr, dtype=None):
108108
return field
109109

110110

111-
def revert_field(field):
112-
'''
111+
def convert_json_field_to_pandas_type(field):
112+
"""
113113
Converts a JSON field descriptor into its corresponding NumPy / pandas type
114114
115115
Parameters
@@ -120,9 +120,35 @@ def revert_field(field):
120120
Returns
121121
-------
122122
dtype
123-
'''
123+
124+
Raises
125+
-----
126+
ValueError
127+
If the type of the provided field is unknown or currently unsupported
128+
129+
Examples
130+
--------
131+
>>> convert_json_field_to_pandas_type({'name': 'an_int',
132+
'type': 'integer'})
133+
'int64'
134+
>>> convert_json_field_to_pandas_type({'name': 'a_categorical',
135+
'type': 'any',
136+
'contraints': {'enum': [
137+
'a', 'b', 'c']},
138+
'ordered': True})
139+
'CategoricalDtype(categories=['a', 'b', 'c'], ordered=True)'
140+
>>> convert_json_field_to_pandas_type({'name': 'a_datetime',
141+
'type': 'datetime'})
142+
'datetime64[ns]'
143+
>>> convert_json_field_to_pandas_type({'name': 'a_datetime_with_tz',
144+
'type': 'datetime',
145+
'tz': 'US/Central'})
146+
'datetime64[ns, US/Central]'
147+
"""
124148
typ = field['type']
125-
if typ == 'integer':
149+
if typ == 'string':
150+
return 'object'
151+
elif typ == 'integer':
126152
return 'int64'
127153
elif typ == 'number':
128154
return 'float64'
@@ -139,7 +165,10 @@ def revert_field(field):
139165
if 'constraints' in field and 'ordered' in field:
140166
return CategoricalDtype(categories=field['constraints']['enum'],
141167
ordered=field['ordered'])
142-
return 'object'
168+
else:
169+
return 'object'
170+
171+
raise ValueError("Unsupported or invalid field type: {}".format(typ))
143172

144173

145174
def build_table_schema(data, index=True, primary_key=None, version=True):
@@ -197,15 +226,15 @@ def build_table_schema(data, index=True, primary_key=None, version=True):
197226
if index:
198227
if data.index.nlevels > 1:
199228
for level in data.index.levels:
200-
fields.append(make_field(level))
229+
fields.append(convert_pandas_type_to_json_field(level))
201230
else:
202-
fields.append(make_field(data.index))
231+
fields.append(convert_pandas_type_to_json_field(data.index))
203232

204233
if data.ndim > 1:
205234
for column, s in data.iteritems():
206-
fields.append(make_field(s))
235+
fields.append(convert_pandas_type_to_json_field(s))
207236
else:
208-
fields.append(make_field(data))
237+
fields.append(convert_pandas_type_to_json_field(data))
209238

210239
schema['fields'] = fields
211240
if index and data.index.is_unique and primary_key is None:
@@ -242,6 +271,13 @@ def parse_table_schema(json, precise_float):
242271
NotImplementedError
243272
If the JSON table schema contains either timezone or timedelta data
244273
274+
Notes
275+
-----
276+
Because ``write_json`` uses the string `index` to denote a name-less
277+
``Index``, this function sets the name of the returned ``DataFrame`` to
278+
``None`` when said string is encountered. Therefore, intentional usage
279+
of `index` as the ``Index`` name is not supported.
280+
245281
See also
246282
--------
247283
build_table_schema : inverse function
@@ -251,7 +287,7 @@ def parse_table_schema(json, precise_float):
251287
col_order = [field['name'] for field in table['schema']['fields']]
252288
df = DataFrame(table['data'])[col_order]
253289

254-
dtypes = {field['name']: revert_field(field)
290+
dtypes = {field['name']: convert_json_field_to_pandas_type(field)
255291
for field in table['schema']['fields']}
256292

257293
# Cannot directly use as_type with timezone data on object; raise for now
@@ -267,7 +303,10 @@ def parse_table_schema(json, precise_float):
267303
df = df.astype(dtypes)
268304

269305
df = df.set_index(table['schema']['primaryKey'])
270-
if all(x.startswith('level_') for x in df.index.names):
271-
df.index.names = [None] * len(df.index.names)
306+
if len(df.index.names) == 1 and df.index.name == 'index':
307+
df.index.name = None
308+
else:
309+
if all(x.startswith('level_') for x in df.index.names):
310+
df.index.names = [None] * len(df.index.names)
272311

273312
return df

0 commit comments

Comments
 (0)