diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index b4b98ec0403a8..9d63ddc6f2056 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -274,6 +274,7 @@ I/O - Bug in :func:`json_normalize` resulting in the first element of a generator object not being included in the returned ``DataFrame`` (:issue:`35923`) - Bug in :func:`read_excel` forward filling :class:`MultiIndex` names with multiple header and index columns specified (:issue:`34673`) - :func:`pandas.read_excel` now respects :func:``pandas.set_option`` (:issue:`34252`) +- Bug in :func:``read_json`` when ``orient="split"`` does not maintan numeric string index (:issue:`28556`) Period ^^^^^^ diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index dd1c012252683..0791599dad201 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -894,14 +894,11 @@ def _try_convert_data(self, name, data, use_dtypes=True, convert_dates=True): if result: return new_data, True - result = False - if data.dtype == "object": # try float try: data = data.astype("float64") - result = True except (TypeError, ValueError): pass @@ -912,7 +909,6 @@ def _try_convert_data(self, name, data, use_dtypes=True, convert_dates=True): # coerce floats to 64 try: data = data.astype("float64") - result = True except (TypeError, ValueError): pass @@ -924,7 +920,6 @@ def _try_convert_data(self, name, data, use_dtypes=True, convert_dates=True): new_data = data.astype("int64") if (new_data == data).all(): data = new_data - result = True except (TypeError, ValueError, OverflowError): pass @@ -934,11 +929,15 @@ def _try_convert_data(self, name, data, use_dtypes=True, convert_dates=True): # coerce floats to 64 try: data = data.astype("int64") - result = True except (TypeError, ValueError): pass - return data, result + # if we have an index, we want to preserve dtypes + if name == "index" and len(data): + if self.orient == "split": + return data, False + + return data, True def _try_convert_to_date(self, data): """ diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index eaf35c845ab9a..dba3cb4db3ab8 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -193,12 +193,13 @@ def test_roundtrip_str_axes(self, request, orient, convert_axes, numpy, dtype): # JSON objects. JSON keys are by definition strings, so there's no way # to disambiguate whether those keys actually were strings or numeric # beforehand and numeric wins out. - # TODO: Split should be able to support this - if convert_axes and (orient in ("split", "index", "columns")): + if convert_axes and (orient in ("index", "columns")): expected.columns = expected.columns.astype(np.int64) expected.index = expected.index.astype(np.int64) elif orient == "records" and convert_axes: expected.columns = expected.columns.astype(np.int64) + elif convert_axes and orient == "split": + expected.columns = expected.columns.astype(np.int64) assert_json_roundtrip_equal(result, expected, orient)