Skip to content

Commit 170ca23

Browse files
BUG: to_xml with index=False and offset input index
Fixes #42458 It was assumed that the index contains the element `0`. This led to a defect when the index of the input Dataframe has an offset, which is a common use case when streaming Dataframes via generators. This fix consists of not relying on accessing the `0` element of `frame_dicts`.
1 parent f329b24 commit 170ca23

File tree

3 files changed

+50
-8
lines changed

3 files changed

+50
-8
lines changed

doc/source/whatsnew/v1.3.1.rst

+1
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ Fixed regressions
2727
Bug fixes
2828
~~~~~~~~~
2929
- Fixed bug in :meth:`DataFrame.transpose` dropping values when the DataFrame had an Extension Array dtype and a duplicate index (:issue:`42380`)
30+
- Fixed bug in :meth:`DataFrame.to_xml` raising ``KeyError`` when called with ``index=False`` and an offset index (:issue:`42458`)
3031
-
3132

3233
.. ---------------------------------------------------------------------------

pandas/io/formats/xml.py

+9-7
Original file line numberDiff line numberDiff line change
@@ -195,14 +195,16 @@ def handle_indexes(self) -> None:
195195
This method will add indexes into attr_cols or elem_cols.
196196
"""
197197

198-
indexes: list[str] = [
199-
x for x in self.frame_dicts[0].keys() if x not in self.orig_cols
200-
]
198+
if not self.index:
199+
return
200+
201+
first_dict = next(iter(self.frame_dicts.values()))
202+
indexes: list[str] = [x for x in first_dict.keys() if x not in self.orig_cols]
201203

202-
if self.attr_cols and self.index:
204+
if self.attr_cols:
203205
self.attr_cols = indexes + self.attr_cols
204206

205-
if self.elem_cols and self.index:
207+
if self.elem_cols:
206208
self.elem_cols = indexes + self.elem_cols
207209

208210
def get_prefix_uri(self) -> str:
@@ -307,7 +309,7 @@ def build_tree(self) -> bytes:
307309
self.elem_row = SubElement(self.root, f"{self.prefix_uri}{self.row_name}")
308310

309311
if not self.attr_cols and not self.elem_cols:
310-
self.elem_cols = list(self.frame_dicts[0].keys())
312+
self.elem_cols = list(d.keys())
311313
self.build_elems()
312314

313315
else:
@@ -477,7 +479,7 @@ def build_tree(self) -> bytes:
477479
self.elem_row = SubElement(self.root, f"{self.prefix_uri}{self.row_name}")
478480

479481
if not self.attr_cols and not self.elem_cols:
480-
self.elem_cols = list(self.frame_dicts[0].keys())
482+
self.elem_cols = list(d.keys())
481483
self.build_elems()
482484

483485
else:

pandas/tests/io/xml/test_to_xml.py

+40-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,10 @@
1111

1212
import pandas.util._test_decorators as td
1313

14-
from pandas import DataFrame
14+
from pandas import (
15+
DataFrame,
16+
RangeIndex,
17+
)
1518
import pandas._testing as tm
1619

1720
from pandas.io.common import get_handle
@@ -290,6 +293,42 @@ def test_index_false_rename_row_root(datapath, parser):
290293
assert output == expected
291294

292295

296+
def test_index_false_with_offset_input_index(parser):
297+
"""
298+
Tests that the output does not contain the `<index>` field when the index of the
299+
input Dataframe has an offset.
300+
301+
This is a regression test for issue #42458.
302+
"""
303+
304+
expected = """\
305+
<?xml version='1.0' encoding='utf-8'?>
306+
<data>
307+
<row>
308+
<shape>square</shape>
309+
<degrees>360</degrees>
310+
<sides>4.0</sides>
311+
</row>
312+
<row>
313+
<shape>circle</shape>
314+
<degrees>360</degrees>
315+
<sides/>
316+
</row>
317+
<row>
318+
<shape>triangle</shape>
319+
<degrees>180</degrees>
320+
<sides>3.0</sides>
321+
</row>
322+
</data>"""
323+
324+
offset_geom_df = geom_df.copy()
325+
offset_geom_df.index = RangeIndex(start=10, stop=13, step=1)
326+
output = offset_geom_df.to_xml(index=False, parser=parser)
327+
output = equalize_decl(output)
328+
329+
assert output == expected
330+
331+
293332
# NA_REP
294333

295334
na_expected = """\

0 commit comments

Comments
 (0)