Skip to content

fix unnecessary sort in pd.read_json and orient="index" #28606

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
Oct 9, 2019
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,7 @@ I/O
- Bug in :func:`DataFrame.to_string` where values were truncated using display options instead of outputting the full content (:issue:`9784`)
- Bug in :meth:`DataFrame.to_json` where a datetime column label would not be written out in ISO format with ``orient="table"`` (:issue:`28130`)
- Bug in :func:`DataFrame.to_parquet` where writing to GCS would fail with `engine='fastparquet'` if the file did not already exist (:issue:`28326`)
- Bug in :meth:`DataFrame.read_json` where using ``orient="index"`` would not maintain the order (:issue:`28557`)
- Bug in :meth:`DataFrame.to_html` where the length of the ``formatters`` argument was not verified (:issue:`28469`)

Plotting
Expand Down
16 changes: 7 additions & 9 deletions pandas/io/json/_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

from pandas.core.dtypes.common import ensure_str, is_period_dtype

from pandas import DataFrame, MultiIndex, Series, isna, to_datetime
from pandas import DataFrame, MultiIndex, Series, compat, isna, to_datetime
from pandas._typing import Scalar
from pandas.core.reshape.concat import concat

Expand Down Expand Up @@ -1112,15 +1112,13 @@ def _parse_no_numpy(self):
self.check_keys_split(decoded)
self.obj = DataFrame(dtype=None, **decoded)
elif orient == "index":
self.obj = (
DataFrame.from_dict(
loads(json, precise_float=self.precise_float),
dtype=None,
orient="index",
)
.sort_index(axis="columns")
.sort_index(axis="index")
self.obj = DataFrame.from_dict(
loads(json, precise_float=self.precise_float),
dtype=None,
orient="index",
)
if compat.PY35:
self.obj = self.obj.sort_index(axis="columns").sort_index(axis="index")
elif orient == "table":
self.obj = parse_table_schema(json, precise_float=self.precise_float)
else:
Expand Down
7 changes: 3 additions & 4 deletions pandas/tests/io/json/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,8 +166,7 @@ def test_roundtrip_simple(self, orient, convert_axes, numpy, dtype):

expected = self.frame.copy()

if not numpy and (orient == "index" or (PY35 and orient == "columns")):
# TODO: debug why sort is required
if not numpy and PY35 and orient in ("index", "columns"):
expected = expected.sort_index()

assert_json_roundtrip_equal(result, expected, orient)
Expand All @@ -181,7 +180,7 @@ def test_roundtrip_intframe(self, orient, convert_axes, numpy, dtype):
data, orient=orient, convert_axes=convert_axes, numpy=numpy, dtype=dtype
)
expected = self.intframe.copy()
if not numpy and (orient == "index" or (PY35 and orient == "columns")):
if not numpy and PY35 and orient in ("index", "columns"):
expected = expected.sort_index()

if (
Expand Down Expand Up @@ -216,7 +215,7 @@ def test_roundtrip_str_axes(self, orient, convert_axes, numpy, dtype):
)

expected = df.copy()
if not numpy and (orient == "index" or (PY35 and orient == "columns")):
if not numpy and PY35 and orient in ("index", "columns"):
expected = expected.sort_index()

if not dtype:
Expand Down