Skip to content

Commit 7806c2c

Browse files
mroeschkepmhatre1
authored andcommitted
BUG: read_json returning Index instead of RangeIndex (pandas-dev#57439)
* BUG: read_json returning Index instead of RangeIndex * Keep track of conversion
1 parent 9ecb1da commit 7806c2c

File tree

3 files changed

+29
-13
lines changed

3 files changed

+29
-13
lines changed

doc/source/whatsnew/v2.2.1.rst

+1
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ Fixed regressions
1818
- Fixed regression in :func:`concat` changing long-standing behavior that always sorted the non-concatenation axis when the axis was a :class:`DatetimeIndex` (:issue:`57006`)
1919
- Fixed regression in :func:`merge_ordered` raising ``TypeError`` for ``fill_method="ffill"`` and ``how="left"`` (:issue:`57010`)
2020
- Fixed regression in :func:`pandas.testing.assert_series_equal` defaulting to ``check_exact=True`` when checking the :class:`Index` (:issue:`57067`)
21+
- Fixed regression in :func:`read_json` where an :class:`Index` would be returned instead of a :class:`RangeIndex` (:issue:`57429`)
2122
- Fixed regression in :func:`wide_to_long` raising an ``AttributeError`` for string columns (:issue:`57066`)
2223
- Fixed regression in :meth:`.DataFrameGroupBy.idxmin`, :meth:`.DataFrameGroupBy.idxmax`, :meth:`.SeriesGroupBy.idxmin`, :meth:`.SeriesGroupBy.idxmax` ignoring the ``skipna`` argument (:issue:`57040`)
2324
- Fixed regression in :meth:`.DataFrameGroupBy.idxmin`, :meth:`.DataFrameGroupBy.idxmax`, :meth:`.SeriesGroupBy.idxmin`, :meth:`.SeriesGroupBy.idxmax` where values containing the minimum or maximum value for the dtype could produce incorrect results (:issue:`57040`)

pandas/io/json/_json.py

+14-11
Original file line numberDiff line numberDiff line change
@@ -1211,23 +1211,25 @@ def _try_convert_data(
12111211
if result:
12121212
return new_data, True
12131213

1214+
converted = False
12141215
if self.dtype_backend is not lib.no_default and not is_axis:
12151216
# Fall through for conversion later on
12161217
return data, True
12171218
elif is_string_dtype(data.dtype):
12181219
# try float
12191220
try:
12201221
data = data.astype("float64")
1222+
converted = True
12211223
except (TypeError, ValueError):
12221224
pass
12231225

1224-
if data.dtype.kind == "f":
1225-
if data.dtype != "float64":
1226-
# coerce floats to 64
1227-
try:
1228-
data = data.astype("float64")
1229-
except (TypeError, ValueError):
1230-
pass
1226+
if data.dtype.kind == "f" and data.dtype != "float64":
1227+
# coerce floats to 64
1228+
try:
1229+
data = data.astype("float64")
1230+
converted = True
1231+
except (TypeError, ValueError):
1232+
pass
12311233

12321234
# don't coerce 0-len data
12331235
if len(data) and data.dtype in ("float", "object"):
@@ -1236,14 +1238,15 @@ def _try_convert_data(
12361238
new_data = data.astype("int64")
12371239
if (new_data == data).all():
12381240
data = new_data
1241+
converted = True
12391242
except (TypeError, ValueError, OverflowError):
12401243
pass
12411244

1242-
# coerce ints to 64
1243-
if data.dtype == "int":
1244-
# coerce floats to 64
1245+
if data.dtype == "int" and data.dtype != "int64":
1246+
# coerce ints to 64
12451247
try:
12461248
data = data.astype("int64")
1249+
converted = True
12471250
except (TypeError, ValueError):
12481251
pass
12491252

@@ -1252,7 +1255,7 @@ def _try_convert_data(
12521255
if self.orient == "split":
12531256
return data, False
12541257

1255-
return data, True
1258+
return data, converted
12561259

12571260
@final
12581261
def _try_convert_to_date(self, data: Series) -> tuple[Series, bool]:

pandas/tests/io/json/test_pandas.py

+14-2
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
DataFrame,
2222
DatetimeIndex,
2323
Index,
24+
RangeIndex,
2425
Series,
2526
Timestamp,
2627
date_range,
@@ -467,12 +468,12 @@ def test_frame_mixedtype_orient(self): # GH10289
467468
left = read_json(inp, orient=orient, convert_axes=False)
468469
tm.assert_frame_equal(left, right)
469470

470-
right.index = pd.RangeIndex(len(df))
471+
right.index = RangeIndex(len(df))
471472
inp = StringIO(df.to_json(orient="records"))
472473
left = read_json(inp, orient="records", convert_axes=False)
473474
tm.assert_frame_equal(left, right)
474475

475-
right.columns = pd.RangeIndex(df.shape[1])
476+
right.columns = RangeIndex(df.shape[1])
476477
inp = StringIO(df.to_json(orient="values"))
477478
left = read_json(inp, orient="values", convert_axes=False)
478479
tm.assert_frame_equal(left, right)
@@ -2139,3 +2140,14 @@ def test_to_json_ea_null():
21392140
{"a":null,"b":null}
21402141
"""
21412142
assert result == expected
2143+
2144+
2145+
def test_read_json_lines_rangeindex():
2146+
# GH 57429
2147+
data = """
2148+
{"a": 1, "b": 2}
2149+
{"a": 3, "b": 4}
2150+
"""
2151+
result = read_json(StringIO(data), lines=True).index
2152+
expected = RangeIndex(2)
2153+
tm.assert_index_equal(result, expected, exact=True)

0 commit comments

Comments
 (0)