Skip to content

Commit af74083

Browse files
committed
BUG: make JSONTableWriter fail if no index.name and 'index' in columns
This commit is itended to fix GH pandas-dev#58925. If index.name is empty it will use set_default_names inside __init__ to make check on overlapping names fail. Otherwise it's done during schema creation and not reflected on the dataframe itself which creates inconsistency between the data and its schema.
1 parent bbe0e53 commit af74083

File tree

3 files changed

+12
-1
lines changed

3 files changed

+12
-1
lines changed

doc/source/whatsnew/v3.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -548,6 +548,7 @@ I/O
548548
- Bug in :meth:`DataFrame.to_string` that raised ``StopIteration`` with nested DataFrames. (:issue:`16098`)
549549
- Bug in :meth:`read_csv` raising ``TypeError`` when ``index_col`` is specified and ``na_values`` is a dict containing the key ``None``. (:issue:`57547`)
550550
- Bug in :meth:`read_stata` raising ``KeyError`` when input file is stored in big-endian format and contains strL data. (:issue:`58638`)
551+
- Bug in :meth:`.DataFrame.to_json` was producing corrupted record (data incompatible with schema) if 'index' was the name of a column and index.name was empty (which is replaced with generic 'index' internally), now it will fail on check if index.name is in columns (:issue:`58925`)
551552

552553
Period
553554
^^^^^^

pandas/io/json/_json.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@
6060
from pandas.io.json._table_schema import (
6161
build_table_schema,
6262
parse_table_schema,
63+
set_default_names,
6364
)
6465
from pandas.io.parsers.readers import validate_integer
6566

@@ -352,7 +353,10 @@ def __init__(
352353
)
353354
raise ValueError(msg)
354355

356+
obj = obj.copy()
355357
self.schema = build_table_schema(obj, index=self.index)
358+
if self.index:
359+
obj = set_default_names(obj)
356360

357361
# NotImplemented on a column MultiIndex
358362
if obj.ndim == 2 and isinstance(obj.columns, MultiIndex):
@@ -369,7 +373,6 @@ def __init__(
369373
msg = "Overlapping names between the index and columns"
370374
raise ValueError(msg)
371375

372-
obj = obj.copy()
373376
timedeltas = obj.select_dtypes(include=["timedelta"]).columns
374377
if len(timedeltas):
375378
obj[timedeltas] = obj[timedeltas].map(lambda x: x.isoformat())

pandas/tests/io/json/test_pandas.py

+7
Original file line numberDiff line numberDiff line change
@@ -1610,6 +1610,13 @@ def test_to_json_from_json_columns_dtypes(self, orient):
16101610
)
16111611
tm.assert_frame_equal(result, expected)
16121612

1613+
def test_to_json_with_index_as_a_column_name(self):
1614+
df = DataFrame(data={"index": [1, 2], "a": [2, 3]})
1615+
with pytest.raises(
1616+
ValueError, match="Overlapping names between the index and columns"
1617+
):
1618+
df.to_json(orient="table")
1619+
16131620
@pytest.mark.parametrize("dtype", [True, {"b": int, "c": int}])
16141621
def test_read_json_table_dtype_raises(self, dtype):
16151622
# GH21345

0 commit comments

Comments
 (0)