Skip to content

Backport PR #42464 on branch 1.3.x (BUG: to_xml with index=False and offset input index) #42513

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.3.1.rst
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ Fixed regressions
Bug fixes
~~~~~~~~~
- Fixed bug in :meth:`DataFrame.transpose` dropping values when the DataFrame had an Extension Array dtype and a duplicate index (:issue:`42380`)
- Fixed bug in :meth:`DataFrame.to_xml` raising ``KeyError`` when called with ``index=False`` and an offset index (:issue:`42458`)
-

.. ---------------------------------------------------------------------------
Expand Down
14 changes: 9 additions & 5 deletions pandas/io/formats/xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,14 +195,18 @@ def handle_indexes(self) -> None:
This method will add indexes into attr_cols or elem_cols.
"""

if not self.index:
return

first_key = next(iter(self.frame_dicts))
indexes: list[str] = [
x for x in self.frame_dicts[0].keys() if x not in self.orig_cols
x for x in self.frame_dicts[first_key].keys() if x not in self.orig_cols
]

if self.attr_cols and self.index:
if self.attr_cols:
self.attr_cols = indexes + self.attr_cols

if self.elem_cols and self.index:
if self.elem_cols:
self.elem_cols = indexes + self.elem_cols

def get_prefix_uri(self) -> str:
Expand Down Expand Up @@ -307,7 +311,7 @@ def build_tree(self) -> bytes:
self.elem_row = SubElement(self.root, f"{self.prefix_uri}{self.row_name}")

if not self.attr_cols and not self.elem_cols:
self.elem_cols = list(self.frame_dicts[0].keys())
self.elem_cols = list(self.d.keys())
self.build_elems()

else:
Expand Down Expand Up @@ -477,7 +481,7 @@ def build_tree(self) -> bytes:
self.elem_row = SubElement(self.root, f"{self.prefix_uri}{self.row_name}")

if not self.attr_cols and not self.elem_cols:
self.elem_cols = list(self.frame_dicts[0].keys())
self.elem_cols = list(self.d.keys())
self.build_elems()

else:
Expand Down
44 changes: 43 additions & 1 deletion pandas/tests/io/xml/test_to_xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,10 @@
from pandas.compat import PY38
import pandas.util._test_decorators as td

from pandas import DataFrame
from pandas import (
DataFrame,
Index,
)
import pandas._testing as tm

from pandas.io.common import get_handle
Expand Down Expand Up @@ -291,6 +294,45 @@ def test_index_false_rename_row_root(datapath, parser):
assert output == expected


@pytest.mark.parametrize(
"offset_index", [list(range(10, 13)), [str(i) for i in range(10, 13)]]
)
def test_index_false_with_offset_input_index(parser, offset_index):
"""
Tests that the output does not contain the `<index>` field when the index of the
input Dataframe has an offset.

This is a regression test for issue #42458.
"""

expected = """\
<?xml version='1.0' encoding='utf-8'?>
<data>
<row>
<shape>square</shape>
<degrees>360</degrees>
<sides>4.0</sides>
</row>
<row>
<shape>circle</shape>
<degrees>360</degrees>
<sides/>
</row>
<row>
<shape>triangle</shape>
<degrees>180</degrees>
<sides>3.0</sides>
</row>
</data>"""

offset_geom_df = geom_df.copy()
offset_geom_df.index = Index(offset_index)
output = offset_geom_df.to_xml(index=False, parser=parser)
output = equalize_decl(output)

assert output == expected


# NA_REP

na_expected = """\
Expand Down