API/BUG: Make to_json index= arg consistent with orient arg (pandas-dev#52143)

dshemetov · im-vinicius · commit 2b1fdeaf9fde · 2023-07-08T12:28:04.000+02:00
* API/BUG: Make to_json index= consistent with orient - split and table allow index=True/False - records and values only allow index=False - index and columns only allow index=True - raise for contradictions in the latter two - see pandas-dev#25513 * style: lint * style: make mypy happy * review: simplify * review: clarify and consolidate branches * style: add explainer comment * doc: change error message in _json * docs: update whatsnew 2.1.0 * docs: sort whatsnew
diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst
@@ -92,13 +92,13 @@ Other enhancements
 - Implemented ``__pandas_priority__`` to allow custom types to take precedence over :class:`DataFrame`, :class:`Series`, :class:`Index`, or :class:`ExtensionArray` for arithmetic operations, :ref:`see the developer guide <extending.pandas_priority>` (:issue:`48347`)
 - Improve error message when having incompatible columns using :meth:`DataFrame.merge` (:issue:`51861`)
 - Improve error message when setting :class:`DataFrame` with wrong number of columns through :meth:`DataFrame.isetitem` (:issue:`51701`)
+- Improved error handling when using :meth:`DataFrame.to_json` with incompatible ``index`` and ``orient`` arguments (:issue:`52143`)
 - Improved error message when creating a DataFrame with empty data (0 rows), no index and an incorrect number of columns. (:issue:`52084`)
 - Let :meth:`DataFrame.to_feather` accept a non-default :class:`Index` and non-string column names (:issue:`51787`)
 - Performance improvement in :func:`read_csv` (:issue:`52632`) with ``engine="c"``
 - :meth:`Categorical.from_codes` has gotten a ``validate`` parameter (:issue:`50975`)
 - Performance improvement in :func:`concat` with homogeneous ``np.float64`` or ``np.float32`` dtypes (:issue:`52685`)
 - Performance improvement in :meth:`DataFrame.filter` when ``items`` is given (:issue:`52941`)
--
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_210.notable_bug_fixes:
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -2306,7 +2306,7 @@ def to_json(
         default_handler: Callable[[Any], JSONSerializable] | None = None,
         lines: bool_t = False,
         compression: CompressionOptions = "infer",
-        index: bool_t = True,
+        index: bool_t | None = None,
         indent: int | None = None,
         storage_options: StorageOptions = None,
         mode: Literal["a", "w"] = "w",
@@ -2375,10 +2375,11 @@ def to_json(
 
             .. versionchanged:: 1.4.0 Zstandard support.
 
-        index : bool, default True
-            Whether to include the index values in the JSON string. Not
-            including the index (``index=False``) is only supported when
-            orient is 'split' or 'table'.
+        index : bool or None, default None
+            The index is only used when 'orient' is 'split', 'index', 'column',
+            or 'table'. Of these, 'index' and 'column' do not support
+            `index=False`.
+
         indent : int, optional
            Length of whitespace used to indent each record.
 
diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py
@@ -100,7 +100,7 @@ def to_json(
     default_handler: Callable[[Any], JSONSerializable] | None = ...,
     lines: bool = ...,
     compression: CompressionOptions = ...,
-    index: bool = ...,
+    index: bool | None = ...,
     indent: int = ...,
     storage_options: StorageOptions = ...,
     mode: Literal["a", "w"] = ...,
@@ -120,7 +120,7 @@ def to_json(
     default_handler: Callable[[Any], JSONSerializable] | None = ...,
     lines: bool = ...,
     compression: CompressionOptions = ...,
-    index: bool = ...,
+    index: bool | None = ...,
     indent: int = ...,
     storage_options: StorageOptions = ...,
     mode: Literal["a", "w"] = ...,
@@ -139,15 +139,24 @@ def to_json(
     default_handler: Callable[[Any], JSONSerializable] | None = None,
     lines: bool = False,
     compression: CompressionOptions = "infer",
-    index: bool = True,
+    index: bool | None = None,
     indent: int = 0,
     storage_options: StorageOptions = None,
     mode: Literal["a", "w"] = "w",
 ) -> str | None:
-    if not index and orient not in ["split", "table"]:
+    if orient in ["records", "values"] and index is True:
         raise ValueError(
-            "'index=False' is only valid when 'orient' is 'split' or 'table'"
+            "'index=True' is only valid when 'orient' is 'split', 'table', "
+            "'index', or 'columns'."
         )
+    elif orient in ["index", "columns"] and index is False:
+        raise ValueError(
+            "'index=False' is only valid when 'orient' is 'split', 'table', "
+            "'records', or 'values'."
+        )
+    elif index is None:
+        # will be ignored for orient='records' and 'values'
+        index = True
 
     if lines and orient != "records":
         raise ValueError("'lines' keyword only valid when 'orient' is records")
diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
@@ -1472,17 +1472,34 @@ def test_index_false_to_json_table(self, data):
 
         assert result == expected
 
-    @pytest.mark.parametrize("orient", ["records", "index", "columns", "values"])
+    @pytest.mark.parametrize("orient", ["index", "columns"])
     def test_index_false_error_to_json(self, orient):
-        # GH 17394
+        # GH 17394, 25513
         # Testing error message from to_json with index=False
 
         df = DataFrame([[1, 2], [4, 5]], columns=["a", "b"])
 
-        msg = "'index=False' is only valid when 'orient' is 'split' or 'table'"
+        msg = (
+            "'index=False' is only valid when 'orient' is 'split', "
+            "'table', 'records', or 'values'"
+        )
         with pytest.raises(ValueError, match=msg):
             df.to_json(orient=orient, index=False)
 
+    @pytest.mark.parametrize("orient", ["records", "values"])
+    def test_index_true_error_to_json(self, orient):
+        # GH 25513
+        # Testing error message from to_json with index=True
+
+        df = DataFrame([[1, 2], [4, 5]], columns=["a", "b"])
+
+        msg = (
+            "'index=True' is only valid when 'orient' is 'split', "
+            "'table', 'index', or 'columns'"
+        )
+        with pytest.raises(ValueError, match=msg):
+            df.to_json(orient=orient, index=True)
+
     @pytest.mark.parametrize("orient", ["split", "table"])
     @pytest.mark.parametrize("index", [True, False])
     def test_index_false_from_json_to_json(self, orient, index):