From 8c8e73f63a772a1f068261b5d9dc9b7f8b8d3e5c Mon Sep 17 00:00:00 2001 From: Dmitry Shemetov Date: Thu, 23 Mar 2023 14:28:56 -0700 Subject: [PATCH 1/9] API/BUG: Make to_json index= consistent with orient - split and table allow index=True/False - records and values only allow index=False - index and columns only allow index=True - raise for contradictions in the latter two - see #25513 --- pandas/core/generic.py | 17 ++++++++++++----- pandas/io/json/_json.py | 18 +++++++++++++++--- pandas/tests/io/json/test_pandas.py | 17 ++++++++++++++--- 3 files changed, 41 insertions(+), 11 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index d5a316c7336da..9ef4884639c47 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2268,7 +2268,7 @@ def to_json( default_handler: Callable[[Any], JSONSerializable] | None = None, lines: bool_t = False, compression: CompressionOptions = "infer", - index: bool_t = True, + index: bool_t | None = None, indent: int | None = None, storage_options: StorageOptions = None, mode: Literal["a", "w"] = "w", @@ -2337,10 +2337,17 @@ def to_json( .. versionchanged:: 1.4.0 Zstandard support. - index : bool, default True - Whether to include the index values in the JSON string. Not - including the index (``index=False``) is only supported when - orient is 'split' or 'table'. + index : bool or None, default None + Whether to include the index values in the JSON string. Different + defaults and options depend on the 'orient' argument: + + - 'split': default True, can also be False + - 'records': default False, cannot be True + - 'index': default True, cannot be False + - 'columns': default True, cannot be False + - 'values': default False, cannot be True + - 'table': default True, can also be False + indent : int, optional Length of whitespace used to indent each record. diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 588ec639bc2fd..c627d4344b9aa 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -140,14 +140,26 @@ def to_json( default_handler: Callable[[Any], JSONSerializable] | None = None, lines: bool = False, compression: CompressionOptions = "infer", - index: bool = True, + index: bool | None = None, indent: int = 0, storage_options: StorageOptions = None, mode: Literal["a", "w"] = "w", ) -> str | None: - if not index and orient not in ["split", "table"]: + if index is None and orient in ["records", "values"]: + index = False + elif index is None: + index = True + + if not index and orient not in ["split", "table", "records", "values"]: + raise ValueError( + "'index=False' is only valid when 'orient' is 'split', 'table', " +\ + "'records', or 'values'" + ) + + if index and orient in ["records", "values"]: raise ValueError( - "'index=False' is only valid when 'orient' is 'split' or 'table'" + "'index=True' is only valid when 'orient' is 'split', 'table', " +\ + "'index', or 'columns'. Convert index to column for other orients." ) if lines and orient != "records": diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 08308ebd2f1cf..94f6ddaacf768 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -1476,17 +1476,28 @@ def test_index_false_to_json_table(self, data): assert result == expected - @pytest.mark.parametrize("orient", ["records", "index", "columns", "values"]) + @pytest.mark.parametrize("orient", ["index", "columns"]) def test_index_false_error_to_json(self, orient): - # GH 17394 + # GH 17394, 25513 # Testing error message from to_json with index=False df = DataFrame([[1, 2], [4, 5]], columns=["a", "b"]) - msg = "'index=False' is only valid when 'orient' is 'split' or 'table'" + msg = "'index=False' is only valid when 'orient' is 'split', 'table', 'records', or 'values'" with pytest.raises(ValueError, match=msg): df.to_json(orient=orient, index=False) + @pytest.mark.parametrize("orient", ["records", "values"]) + def test_index_true_error_to_json(self, orient): + # GH 25513 + # Testing error message from to_json with index=True + + df = DataFrame([[1, 2], [4, 5]], columns=["a", "b"]) + + msg = "'index=True' is only valid when 'orient' is 'split', 'table', 'index', or 'columns'" + with pytest.raises(ValueError, match=msg): + df.to_json(orient=orient, index=True) + @pytest.mark.parametrize("orient", ["split", "table"]) @pytest.mark.parametrize("index", [True, False]) def test_index_false_from_json_to_json(self, orient, index): From 32ba1abf368ffc7f0ef3f10096a0259547858613 Mon Sep 17 00:00:00 2001 From: Dmitry Shemetov Date: Thu, 23 Mar 2023 14:56:06 -0700 Subject: [PATCH 2/9] style: lint --- pandas/core/generic.py | 2 +- pandas/io/json/_json.py | 4 ++-- pandas/tests/io/json/test_pandas.py | 10 ++++++++-- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 9ef4884639c47..89246fb6e4c94 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2347,7 +2347,7 @@ def to_json( - 'columns': default True, cannot be False - 'values': default False, cannot be True - 'table': default True, can also be False - + indent : int, optional Length of whitespace used to indent each record. diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index c627d4344b9aa..96d3af79c2856 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -152,13 +152,13 @@ def to_json( if not index and orient not in ["split", "table", "records", "values"]: raise ValueError( - "'index=False' is only valid when 'orient' is 'split', 'table', " +\ + "'index=False' is only valid when 'orient' is 'split', 'table', " "'records', or 'values'" ) if index and orient in ["records", "values"]: raise ValueError( - "'index=True' is only valid when 'orient' is 'split', 'table', " +\ + "'index=True' is only valid when 'orient' is 'split', 'table', " "'index', or 'columns'. Convert index to column for other orients." ) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 94f6ddaacf768..ea9491c7a45e0 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -1483,7 +1483,10 @@ def test_index_false_error_to_json(self, orient): df = DataFrame([[1, 2], [4, 5]], columns=["a", "b"]) - msg = "'index=False' is only valid when 'orient' is 'split', 'table', 'records', or 'values'" + msg = ( + "'index=False' is only valid when 'orient' is 'split', " + "'table', 'records', or 'values'" + ) with pytest.raises(ValueError, match=msg): df.to_json(orient=orient, index=False) @@ -1494,7 +1497,10 @@ def test_index_true_error_to_json(self, orient): df = DataFrame([[1, 2], [4, 5]], columns=["a", "b"]) - msg = "'index=True' is only valid when 'orient' is 'split', 'table', 'index', or 'columns'" + msg = ( + "'index=True' is only valid when 'orient' is 'split', " + "'table', 'index', or 'columns'" + ) with pytest.raises(ValueError, match=msg): df.to_json(orient=orient, index=True) From 64e14c39e1332880ebdc0a15dbdd68a05c935f01 Mon Sep 17 00:00:00 2001 From: Dmitry Shemetov Date: Mon, 27 Mar 2023 13:17:08 -0700 Subject: [PATCH 3/9] style: make mypy happy --- pandas/io/json/_json.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 96d3af79c2856..7a56418e69163 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -101,7 +101,7 @@ def to_json( default_handler: Callable[[Any], JSONSerializable] | None = ..., lines: bool = ..., compression: CompressionOptions = ..., - index: bool = ..., + index: bool | None = ..., indent: int = ..., storage_options: StorageOptions = ..., mode: Literal["a", "w"] = ..., @@ -121,7 +121,7 @@ def to_json( default_handler: Callable[[Any], JSONSerializable] | None = ..., lines: bool = ..., compression: CompressionOptions = ..., - index: bool = ..., + index: bool | None = ..., indent: int = ..., storage_options: StorageOptions = ..., mode: Literal["a", "w"] = ..., From 636b41771d607a203908e1b5c63bd5edb7a471b0 Mon Sep 17 00:00:00 2001 From: Dmitry Shemetov Date: Tue, 28 Mar 2023 16:41:18 -0700 Subject: [PATCH 4/9] review: simplify --- pandas/core/generic.py | 12 +++--------- pandas/io/json/_json.py | 7 +++---- 2 files changed, 6 insertions(+), 13 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 89246fb6e4c94..7576e0a5f7876 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2338,15 +2338,9 @@ def to_json( .. versionchanged:: 1.4.0 Zstandard support. index : bool or None, default None - Whether to include the index values in the JSON string. Different - defaults and options depend on the 'orient' argument: - - - 'split': default True, can also be False - - 'records': default False, cannot be True - - 'index': default True, cannot be False - - 'columns': default True, cannot be False - - 'values': default False, cannot be True - - 'table': default True, can also be False + The index is only used when 'orient' is 'split', 'index', 'column', + or 'table'. Of these, 'index' and 'column' do not support + `index=False`. indent : int, optional Length of whitespace used to indent each record. diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 7a56418e69163..5abd032efd4ab 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -150,13 +150,12 @@ def to_json( elif index is None: index = True - if not index and orient not in ["split", "table", "records", "values"]: + if not index and orient in ["index", "columns"]: raise ValueError( "'index=False' is only valid when 'orient' is 'split', 'table', " - "'records', or 'values'" + "'records', or 'values'." ) - - if index and orient in ["records", "values"]: + elif index and orient in ["records", "values"]: raise ValueError( "'index=True' is only valid when 'orient' is 'split', 'table', " "'index', or 'columns'. Convert index to column for other orients." From 89cf3c505fbe288b3b125167acba5d462ec1fe47 Mon Sep 17 00:00:00 2001 From: Dmitry Shemetov Date: Thu, 6 Apr 2023 15:12:17 -0700 Subject: [PATCH 5/9] review: clarify and consolidate branches --- pandas/io/json/_json.py | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 050036a7d4714..475b8f5ebc720 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -146,21 +146,18 @@ def to_json( storage_options: StorageOptions = None, mode: Literal["a", "w"] = "w", ) -> str | None: - if index is None and orient in ["records", "values"]: - index = False - elif index is None: - index = True - - if not index and orient in ["index", "columns"]: - raise ValueError( - "'index=False' is only valid when 'orient' is 'split', 'table', " - "'records', or 'values'." - ) - elif index and orient in ["records", "values"]: + if orient in ["records", "values"] and index is True: raise ValueError( "'index=True' is only valid when 'orient' is 'split', 'table', " "'index', or 'columns'. Convert index to column for other orients." ) + elif orient in ["index", "columns"] and index is False: + raise ValueError( + "'index=False' is only valid when 'orient' is 'split', 'table', " + "'records', or 'values'. Convert index to column for other orients." + ) + elif index is None: + index = True if lines and orient != "records": raise ValueError("'lines' keyword only valid when 'orient' is records") From 38b81b2971d68b6aae18b6fe9776a658b4ad8502 Mon Sep 17 00:00:00 2001 From: Dmitry Shemetov Date: Thu, 6 Apr 2023 15:15:10 -0700 Subject: [PATCH 6/9] style: add explainer comment --- pandas/io/json/_json.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 475b8f5ebc720..75f1070c9ccf8 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -157,6 +157,7 @@ def to_json( "'records', or 'values'. Convert index to column for other orients." ) elif index is None: + # will be ignored for orient='records' and 'values' index = True if lines and orient != "records": From 8aef2dd2120284ec89b35947b7692133a8bfc823 Mon Sep 17 00:00:00 2001 From: Dmitry Shemetov Date: Tue, 9 May 2023 14:45:03 -0700 Subject: [PATCH 7/9] doc: change error message in _json --- pandas/io/json/_json.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index b1a0598c7c014..5c2fba814375f 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -147,12 +147,12 @@ def to_json( if orient in ["records", "values"] and index is True: raise ValueError( "'index=True' is only valid when 'orient' is 'split', 'table', " - "'index', or 'columns'. Convert index to column for other orients." + "'index', or 'columns'." ) elif orient in ["index", "columns"] and index is False: raise ValueError( "'index=False' is only valid when 'orient' is 'split', 'table', " - "'records', or 'values'. Convert index to column for other orients." + "'records', or 'values'." ) elif index is None: # will be ignored for orient='records' and 'values' From 3d84579c7b1b072b0f5073e23bc770ccf9638a85 Mon Sep 17 00:00:00 2001 From: Dmitry Shemetov Date: Tue, 9 May 2023 15:20:20 -0700 Subject: [PATCH 8/9] docs: update whatsnew 2.1.0 --- doc/source/whatsnew/v2.1.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 36b2aa3c28da5..92bebdcb22557 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -97,7 +97,7 @@ Other enhancements - Performance improvement in :func:`read_csv` (:issue:`52632`) with ``engine="c"`` - Performance improvement in :func:`concat` with homogeneous ``np.float64`` or ``np.float32`` dtypes (:issue:`52685`) - Performance improvement in :meth:`DataFrame.filter` when ``items`` is given (:issue:`52941`) -- +- Improved error messaging when using :meth:`DataFrame.to_json` with incompatible ``index`` and ``orient`` arguments (:issue:`52143`) .. --------------------------------------------------------------------------- .. _whatsnew_210.notable_bug_fixes: From d53b9f49852d880547e04a6be33e9e233c3b3742 Mon Sep 17 00:00:00 2001 From: Dmitry Shemetov Date: Wed, 10 May 2023 15:05:11 -0700 Subject: [PATCH 9/9] docs: sort whatsnew --- doc/source/whatsnew/v2.1.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 92bebdcb22557..707baf959654a 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -92,12 +92,12 @@ Other enhancements - Implemented ``__pandas_priority__`` to allow custom types to take precedence over :class:`DataFrame`, :class:`Series`, :class:`Index`, or :class:`ExtensionArray` for arithmetic operations, :ref:`see the developer guide ` (:issue:`48347`) - Improve error message when having incompatible columns using :meth:`DataFrame.merge` (:issue:`51861`) - Improve error message when setting :class:`DataFrame` with wrong number of columns through :meth:`DataFrame.isetitem` (:issue:`51701`) +- Improved error handling when using :meth:`DataFrame.to_json` with incompatible ``index`` and ``orient`` arguments (:issue:`52143`) - Improved error message when creating a DataFrame with empty data (0 rows), no index and an incorrect number of columns. (:issue:`52084`) - Let :meth:`DataFrame.to_feather` accept a non-default :class:`Index` and non-string column names (:issue:`51787`) - Performance improvement in :func:`read_csv` (:issue:`52632`) with ``engine="c"`` - Performance improvement in :func:`concat` with homogeneous ``np.float64`` or ``np.float32`` dtypes (:issue:`52685`) - Performance improvement in :meth:`DataFrame.filter` when ``items`` is given (:issue:`52941`) -- Improved error messaging when using :meth:`DataFrame.to_json` with incompatible ``index`` and ``orient`` arguments (:issue:`52143`) .. --------------------------------------------------------------------------- .. _whatsnew_210.notable_bug_fixes: