Skip to content

BUG: to_xml with index=False and offset input index #42464

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jul 12, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.3.1.rst
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ Fixed regressions
Bug fixes
~~~~~~~~~
- Fixed bug in :meth:`DataFrame.transpose` dropping values when the DataFrame had an Extension Array dtype and a duplicate index (:issue:`42380`)
- Fixed bug in :meth:`DataFrame.to_xml` raising ``KeyError`` when called with ``index=False`` and an offset index (:issue:`42458`)
-

.. ---------------------------------------------------------------------------
Expand Down
14 changes: 9 additions & 5 deletions pandas/io/formats/xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,14 +195,18 @@ def handle_indexes(self) -> None:
This method will add indexes into attr_cols or elem_cols.
"""

if not self.index:
return

first_key = next(iter(self.frame_dicts))
indexes: list[str] = [
x for x in self.frame_dicts[0].keys() if x not in self.orig_cols
x for x in self.frame_dicts[first_key].keys() if x not in self.orig_cols
]

if self.attr_cols and self.index:
if self.attr_cols:
self.attr_cols = indexes + self.attr_cols

if self.elem_cols and self.index:
if self.elem_cols:
self.elem_cols = indexes + self.elem_cols

def get_prefix_uri(self) -> str:
Expand Down Expand Up @@ -307,7 +311,7 @@ def build_tree(self) -> bytes:
self.elem_row = SubElement(self.root, f"{self.prefix_uri}{self.row_name}")

if not self.attr_cols and not self.elem_cols:
self.elem_cols = list(self.frame_dicts[0].keys())
self.elem_cols = list(self.d.keys())
self.build_elems()

else:
Expand Down Expand Up @@ -477,7 +481,7 @@ def build_tree(self) -> bytes:
self.elem_row = SubElement(self.root, f"{self.prefix_uri}{self.row_name}")

if not self.attr_cols and not self.elem_cols:
self.elem_cols = list(self.frame_dicts[0].keys())
self.elem_cols = list(self.d.keys())
self.build_elems()

else:
Expand Down
44 changes: 43 additions & 1 deletion pandas/tests/io/xml/test_to_xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,10 @@

import pandas.util._test_decorators as td

from pandas import DataFrame
from pandas import (
DataFrame,
Index,
)
import pandas._testing as tm

from pandas.io.common import get_handle
Expand Down Expand Up @@ -290,6 +293,45 @@ def test_index_false_rename_row_root(datapath, parser):
assert output == expected


@pytest.mark.parametrize(
"offset_index", [list(range(10, 13)), [str(i) for i in range(10, 13)]]
)
def test_index_false_with_offset_input_index(parser, offset_index):
"""
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure we annotate reason for test even on bug fixes.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can remove the docstring if it goes against coding guidelines for the project. But personally I like to give context to tests like these.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we normally don't add this, just a comment to the issue. but no harm as you have written it.

Tests that the output does not contain the `<index>` field when the index of the
input Dataframe has an offset.

This is a regression test for issue #42458.
"""

expected = """\
<?xml version='1.0' encoding='utf-8'?>
<data>
<row>
<shape>square</shape>
<degrees>360</degrees>
<sides>4.0</sides>
</row>
<row>
<shape>circle</shape>
<degrees>360</degrees>
<sides/>
</row>
<row>
<shape>triangle</shape>
<degrees>180</degrees>
<sides>3.0</sides>
</row>
</data>"""

offset_geom_df = geom_df.copy()
offset_geom_df.index = Index(offset_index)
output = offset_geom_df.to_xml(index=False, parser=parser)
output = equalize_decl(output)

assert output == expected


# NA_REP

na_expected = """\
Expand Down