Skip to content

Commit bf97c6a

Browse files
committed
BUG: make JSONTableWriter fail if no index.name and 'index' in columns
This commit is itended to fix GH #58925. If index.name is empty it will use set_default_names inside __init__ to make check on overlapping names fail. Otherwise it's done during schema creation and not reflected on the dataframe itself which creates inconsistency between the data and its schema. add mention of the raised error to the `to_json` documentation move new logic description from IO docs to to_json docstring
1 parent bbe0e53 commit bf97c6a

File tree

4 files changed

+14
-2
lines changed

4 files changed

+14
-2
lines changed

doc/source/whatsnew/v3.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -542,6 +542,7 @@ MultiIndex
542542
I/O
543543
^^^
544544
- Bug in :class:`DataFrame` and :class:`Series` ``repr`` of :py:class:`collections.abc.Mapping`` elements. (:issue:`57915`)
545+
- Bug in :meth:`.DataFrame.to_json` was producing corrupted record (data incompatible with schema) if 'index' was the name of a column and index.name was empty (which is replaced with generic 'index' internally), now it will fail on check if index.name is in columns (:issue:`58925`)
545546
- Bug in :meth:`DataFrame.to_dict` raises unnecessary ``UserWarning`` when columns are not unique and ``orient='tight'``. (:issue:`58281`)
546547
- Bug in :meth:`DataFrame.to_excel` when writing empty :class:`DataFrame` with :class:`MultiIndex` on both axes (:issue:`57696`)
547548
- Bug in :meth:`DataFrame.to_stata` when writing :class:`DataFrame` and ``byteorder=`big```. (:issue:`58969`)

pandas/core/generic.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -2388,7 +2388,8 @@ def to_json(
23882388
index : bool or None, default None
23892389
The index is only used when 'orient' is 'split', 'index', 'column',
23902390
or 'table'. Of these, 'index' and 'column' do not support
2391-
`index=False`.
2391+
`index=False`. The string 'index' as a column name with empty :class:`Index`
2392+
or if it is 'index' will raise a ``ValueError``.
23922393
23932394
indent : int, optional
23942395
Length of whitespace used to indent each record.

pandas/io/json/_json.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@
6060
from pandas.io.json._table_schema import (
6161
build_table_schema,
6262
parse_table_schema,
63+
set_default_names,
6364
)
6465
from pandas.io.parsers.readers import validate_integer
6566

@@ -352,7 +353,10 @@ def __init__(
352353
)
353354
raise ValueError(msg)
354355

356+
obj = obj.copy()
355357
self.schema = build_table_schema(obj, index=self.index)
358+
if self.index:
359+
obj = set_default_names(obj)
356360

357361
# NotImplemented on a column MultiIndex
358362
if obj.ndim == 2 and isinstance(obj.columns, MultiIndex):
@@ -369,7 +373,6 @@ def __init__(
369373
msg = "Overlapping names between the index and columns"
370374
raise ValueError(msg)
371375

372-
obj = obj.copy()
373376
timedeltas = obj.select_dtypes(include=["timedelta"]).columns
374377
if len(timedeltas):
375378
obj[timedeltas] = obj[timedeltas].map(lambda x: x.isoformat())

pandas/tests/io/json/test_pandas.py

+7
Original file line numberDiff line numberDiff line change
@@ -1610,6 +1610,13 @@ def test_to_json_from_json_columns_dtypes(self, orient):
16101610
)
16111611
tm.assert_frame_equal(result, expected)
16121612

1613+
def test_to_json_with_index_as_a_column_name(self):
1614+
df = DataFrame(data={"index": [1, 2], "a": [2, 3]})
1615+
with pytest.raises(
1616+
ValueError, match="Overlapping names between the index and columns"
1617+
):
1618+
df.to_json(orient="table")
1619+
16131620
@pytest.mark.parametrize("dtype", [True, {"b": int, "c": int}])
16141621
def test_read_json_table_dtype_raises(self, dtype):
16151622
# GH21345

0 commit comments

Comments
 (0)