BUG: to_xml with index=False and offset input index

stephan-hesselmann-by · stephan-hesselmann-by · commit d90f06a719e2 · 2021-07-09T17:52:15.000+02:00
Fixes pandas-dev#42458 It was assumed that the index contains the element `0`. This led to a defect when the index of the input Dataframe has an offset, which is a common use case when streaming Dataframes via generators. This fix consists of not relying on accessing the `0` element of `frame_dicts`.
diff --git a/pandas/io/formats/xml.py b/pandas/io/formats/xml.py
@@ -195,14 +195,18 @@ def handle_indexes(self) -> None:
         This method will add indexes into attr_cols or elem_cols.
         """
 
+        if not self.index:
+            return
+
+        first_dict = next(iter(self.frame_dicts.values()))
         indexes: list[str] = [
-            x for x in self.frame_dicts[0].keys() if x not in self.orig_cols
+            x for x in first_dict.keys() if x not in self.orig_cols
         ]
 
-        if self.attr_cols and self.index:
+        if self.attr_cols:
             self.attr_cols = indexes + self.attr_cols
 
-        if self.elem_cols and self.index:
+        if self.elem_cols:
             self.elem_cols = indexes + self.elem_cols
 
     def get_prefix_uri(self) -> str:
@@ -307,7 +311,7 @@ def build_tree(self) -> bytes:
             self.elem_row = SubElement(self.root, f"{self.prefix_uri}{self.row_name}")
 
             if not self.attr_cols and not self.elem_cols:
-                self.elem_cols = list(self.frame_dicts[0].keys())
+                self.elem_cols = list(d.keys())
                 self.build_elems()
 
             else:
@@ -477,7 +481,7 @@ def build_tree(self) -> bytes:
             self.elem_row = SubElement(self.root, f"{self.prefix_uri}{self.row_name}")
 
             if not self.attr_cols and not self.elem_cols:
-                self.elem_cols = list(self.frame_dicts[0].keys())
+                self.elem_cols = list(d.keys())
                 self.build_elems()
 
             else:
diff --git a/pandas/tests/io/xml/test_to_xml.py b/pandas/tests/io/xml/test_to_xml.py
@@ -290,6 +290,42 @@ def test_index_false_rename_row_root(datapath, parser):
         assert output == expected
 
 
+def test_index_false_with_offset_input_index(parser):
+    """
+    Tests that the output does not contain the `<index>` field when the index of the
+    input Dataframe has an offset.
+
+    This is a regression test for issue #42458.
+    """
+
+    expected = """\
+<?xml version='1.0' encoding='utf-8'?>
+<data>
+  <row>
+    <shape>square</shape>
+    <degrees>360</degrees>
+    <sides>4.0</sides>
+  </row>
+  <row>
+    <shape>circle</shape>
+    <degrees>360</degrees>
+    <sides/>
+  </row>
+  <row>
+    <shape>triangle</shape>
+    <degrees>180</degrees>
+    <sides>3.0</sides>
+  </row>
+</data>"""
+
+    offset_geom_df = geom_df.copy()
+    offset_geom_df.index = range(10, len(geom_df.index) + 10)
+    output = offset_geom_df.to_xml(index=False, parser=parser)
+    output = equalize_decl(output)
+
+    assert output == expected
+
+
 # NA_REP
 
 na_expected = """\