BUG: read_json returning Index instead of RangeIndex (pandas-dev#57439)

mroeschke · pmhatre1 · commit 7806c2c6ecef · 2024-05-06T23:13:08.000-07:00
* BUG: read_json returning Index instead of RangeIndex

* Keep track of conversion
diff --git a/doc/source/whatsnew/v2.2.1.rst b/doc/source/whatsnew/v2.2.1.rst
@@ -18,6 +18,7 @@ Fixed regressions
 - Fixed regression in :func:`concat` changing long-standing behavior that always sorted the non-concatenation axis when the axis was a :class:`DatetimeIndex` (:issue:`57006`)
 - Fixed regression in :func:`merge_ordered` raising ``TypeError`` for ``fill_method="ffill"`` and ``how="left"`` (:issue:`57010`)
 - Fixed regression in :func:`pandas.testing.assert_series_equal` defaulting to ``check_exact=True`` when checking the :class:`Index` (:issue:`57067`)
+- Fixed regression in :func:`read_json` where an :class:`Index` would be returned instead of a :class:`RangeIndex` (:issue:`57429`)
 - Fixed regression in :func:`wide_to_long` raising an ``AttributeError`` for string columns (:issue:`57066`)
 - Fixed regression in :meth:`.DataFrameGroupBy.idxmin`, :meth:`.DataFrameGroupBy.idxmax`, :meth:`.SeriesGroupBy.idxmin`, :meth:`.SeriesGroupBy.idxmax` ignoring the ``skipna`` argument (:issue:`57040`)
 - Fixed regression in :meth:`.DataFrameGroupBy.idxmin`, :meth:`.DataFrameGroupBy.idxmax`, :meth:`.SeriesGroupBy.idxmin`, :meth:`.SeriesGroupBy.idxmax` where values containing the minimum or maximum value for the dtype could produce incorrect results (:issue:`57040`)
diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py
@@ -1211,23 +1211,25 @@ def _try_convert_data(
             if result:
                 return new_data, True
 
+        converted = False
         if self.dtype_backend is not lib.no_default and not is_axis:
             # Fall through for conversion later on
             return data, True
         elif is_string_dtype(data.dtype):
             # try float
             try:
                 data = data.astype("float64")
+                converted = True
             except (TypeError, ValueError):
                 pass
 
-        if data.dtype.kind == "f":
-            if data.dtype != "float64":
-                # coerce floats to 64
-                try:
-                    data = data.astype("float64")
-                except (TypeError, ValueError):
-                    pass
+        if data.dtype.kind == "f" and data.dtype != "float64":
+            # coerce floats to 64
+            try:
+                data = data.astype("float64")
+                converted = True
+            except (TypeError, ValueError):
+                pass
 
         # don't coerce 0-len data
         if len(data) and data.dtype in ("float", "object"):
@@ -1236,14 +1238,15 @@ def _try_convert_data(
                 new_data = data.astype("int64")
                 if (new_data == data).all():
                     data = new_data
+                    converted = True
             except (TypeError, ValueError, OverflowError):
                 pass
 
-        # coerce ints to 64
-        if data.dtype == "int":
-            # coerce floats to 64
+        if data.dtype == "int" and data.dtype != "int64":
+            # coerce ints to 64
             try:
                 data = data.astype("int64")
+                converted = True
             except (TypeError, ValueError):
                 pass
 
@@ -1252,7 +1255,7 @@ def _try_convert_data(
             if self.orient == "split":
                 return data, False
 
-        return data, True
+        return data, converted
 
     @final
     def _try_convert_to_date(self, data: Series) -> tuple[Series, bool]:
diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
@@ -21,6 +21,7 @@
     DataFrame,
     DatetimeIndex,
     Index,
+    RangeIndex,
     Series,
     Timestamp,
     date_range,
@@ -467,12 +468,12 @@ def test_frame_mixedtype_orient(self):  # GH10289
             left = read_json(inp, orient=orient, convert_axes=False)
             tm.assert_frame_equal(left, right)
 
-        right.index = pd.RangeIndex(len(df))
+        right.index = RangeIndex(len(df))
         inp = StringIO(df.to_json(orient="records"))
         left = read_json(inp, orient="records", convert_axes=False)
         tm.assert_frame_equal(left, right)
 
-        right.columns = pd.RangeIndex(df.shape[1])
+        right.columns = RangeIndex(df.shape[1])
         inp = StringIO(df.to_json(orient="values"))
         left = read_json(inp, orient="values", convert_axes=False)
         tm.assert_frame_equal(left, right)
@@ -2139,3 +2140,14 @@ def test_to_json_ea_null():
 {"a":null,"b":null}
 """
     assert result == expected
+
+
+def test_read_json_lines_rangeindex():
+    # GH 57429
+    data = """
+{"a": 1, "b": 2}
+{"a": 3, "b": 4}
+"""
+    result = read_json(StringIO(data), lines=True).index
+    expected = RangeIndex(2)
+    tm.assert_index_equal(result, expected, exact=True)