Skip to content

Commit b5d5f9f

Browse files
mroeschkemeeseeksmachine
authored andcommitted
Backport PR pandas-dev#57439: BUG: read_json returning Index instead of RangeIndex
1 parent c101d30 commit b5d5f9f

File tree

3 files changed

+29
-13
lines changed

3 files changed

+29
-13
lines changed

doc/source/whatsnew/v2.2.1.rst

+1
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ Fixed regressions
1818
- Fixed regression in :func:`concat` changing long-standing behavior that always sorted the non-concatenation axis when the axis was a :class:`DatetimeIndex` (:issue:`57006`)
1919
- Fixed regression in :func:`merge_ordered` raising ``TypeError`` for ``fill_method="ffill"`` and ``how="left"`` (:issue:`57010`)
2020
- Fixed regression in :func:`pandas.testing.assert_series_equal` defaulting to ``check_exact=True`` when checking the :class:`Index` (:issue:`57067`)
21+
- Fixed regression in :func:`read_json` where an :class:`Index` would be returned instead of a :class:`RangeIndex` (:issue:`57429`)
2122
- Fixed regression in :func:`wide_to_long` raising an ``AttributeError`` for string columns (:issue:`57066`)
2223
- Fixed regression in :meth:`.DataFrameGroupBy.idxmin`, :meth:`.DataFrameGroupBy.idxmax`, :meth:`.SeriesGroupBy.idxmin`, :meth:`.SeriesGroupBy.idxmax` ignoring the ``skipna`` argument (:issue:`57040`)
2324
- Fixed regression in :meth:`.DataFrameGroupBy.idxmin`, :meth:`.DataFrameGroupBy.idxmax`, :meth:`.SeriesGroupBy.idxmin`, :meth:`.SeriesGroupBy.idxmax` where values containing the minimum or maximum value for the dtype could produce incorrect results (:issue:`57040`)

pandas/io/json/_json.py

+14-11
Original file line numberDiff line numberDiff line change
@@ -1266,23 +1266,25 @@ def _try_convert_data(
12661266
if result:
12671267
return new_data, True
12681268

1269+
converted = False
12691270
if self.dtype_backend is not lib.no_default and not is_axis:
12701271
# Fall through for conversion later on
12711272
return data, True
12721273
elif is_string_dtype(data.dtype):
12731274
# try float
12741275
try:
12751276
data = data.astype("float64")
1277+
converted = True
12761278
except (TypeError, ValueError):
12771279
pass
12781280

1279-
if data.dtype.kind == "f":
1280-
if data.dtype != "float64":
1281-
# coerce floats to 64
1282-
try:
1283-
data = data.astype("float64")
1284-
except (TypeError, ValueError):
1285-
pass
1281+
if data.dtype.kind == "f" and data.dtype != "float64":
1282+
# coerce floats to 64
1283+
try:
1284+
data = data.astype("float64")
1285+
converted = True
1286+
except (TypeError, ValueError):
1287+
pass
12861288

12871289
# don't coerce 0-len data
12881290
if len(data) and data.dtype in ("float", "object"):
@@ -1291,14 +1293,15 @@ def _try_convert_data(
12911293
new_data = data.astype("int64")
12921294
if (new_data == data).all():
12931295
data = new_data
1296+
converted = True
12941297
except (TypeError, ValueError, OverflowError):
12951298
pass
12961299

1297-
# coerce ints to 64
1298-
if data.dtype == "int":
1299-
# coerce floats to 64
1300+
if data.dtype == "int" and data.dtype != "int64":
1301+
# coerce ints to 64
13001302
try:
13011303
data = data.astype("int64")
1304+
converted = True
13021305
except (TypeError, ValueError):
13031306
pass
13041307

@@ -1307,7 +1310,7 @@ def _try_convert_data(
13071310
if self.orient == "split":
13081311
return data, False
13091312

1310-
return data, True
1313+
return data, converted
13111314

13121315
@final
13131316
def _try_convert_to_date(self, data: Series) -> tuple[Series, bool]:

pandas/tests/io/json/test_pandas.py

+14-2
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
DataFrame,
2525
DatetimeIndex,
2626
Index,
27+
RangeIndex,
2728
Series,
2829
Timestamp,
2930
date_range,
@@ -493,12 +494,12 @@ def test_frame_mixedtype_orient(self): # GH10289
493494
left = read_json(inp, orient=orient, convert_axes=False)
494495
tm.assert_frame_equal(left, right)
495496

496-
right.index = pd.RangeIndex(len(df))
497+
right.index = RangeIndex(len(df))
497498
inp = StringIO(df.to_json(orient="records"))
498499
left = read_json(inp, orient="records", convert_axes=False)
499500
tm.assert_frame_equal(left, right)
500501

501-
right.columns = pd.RangeIndex(df.shape[1])
502+
right.columns = RangeIndex(df.shape[1])
502503
inp = StringIO(df.to_json(orient="values"))
503504
left = read_json(inp, orient="values", convert_axes=False)
504505
tm.assert_frame_equal(left, right)
@@ -2188,3 +2189,14 @@ def test_to_json_ea_null():
21882189
{"a":null,"b":null}
21892190
"""
21902191
assert result == expected
2192+
2193+
2194+
def test_read_json_lines_rangeindex():
2195+
# GH 57429
2196+
data = """
2197+
{"a": 1, "b": 2}
2198+
{"a": 3, "b": 4}
2199+
"""
2200+
result = read_json(StringIO(data), lines=True).index
2201+
expected = RangeIndex(2)
2202+
tm.assert_index_equal(result, expected, exact=True)

0 commit comments

Comments
 (0)