From 170ca23075977340830b3883b2a0e8bd3204eb4f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Stephan=20He=C3=9Felmann=20=28lgtf/39809=29?=
 <stephan.hesselmann@blueyonder.com>
Date: Fri, 9 Jul 2021 17:27:10 +0200
Subject: [PATCH 1/2] BUG: `to_xml` with `index=False` and offset input index

Fixes #42458

It was assumed that the index contains the element `0`. This led to a
defect when the index of the input Dataframe has an offset, which is a
common use case when streaming Dataframes via generators.

This fix consists of not relying on accessing the `0` element of
`frame_dicts`.
---
 doc/source/whatsnew/v1.3.1.rst     |  1 +
 pandas/io/formats/xml.py           | 16 +++++++-----
 pandas/tests/io/xml/test_to_xml.py | 41 +++++++++++++++++++++++++++++-
 3 files changed, 50 insertions(+), 8 deletions(-)
diff --git a/doc/source/whatsnew/v1.3.1.rst b/doc/source/whatsnew/v1.3.1.rst
index 255747c3c5c6d..c1cf9d208673f 100644
--- a/doc/source/whatsnew/v1.3.1.rst
+++ b/doc/source/whatsnew/v1.3.1.rst
@@ -27,6 +27,7 @@ Fixed regressions
 Bug fixes
 ~~~~~~~~~
 - Fixed bug in :meth:`DataFrame.transpose` dropping values when the DataFrame had an Extension Array dtype and a duplicate index (:issue:`42380`)
+- Fixed bug in :meth:`DataFrame.to_xml` raising ``KeyError`` when called with ``index=False`` and an offset index (:issue:`42458`)
 -
 
 .. ---------------------------------------------------------------------------
diff --git a/pandas/io/formats/xml.py b/pandas/io/formats/xml.py
index d2b86cc458b74..e7ed4036fda20 100644
--- a/pandas/io/formats/xml.py
+++ b/pandas/io/formats/xml.py
@@ -195,14 +195,16 @@ def handle_indexes(self) -> None:
         This method will add indexes into attr_cols or elem_cols.
         """
 
-        indexes: list[str] = [
-            x for x in self.frame_dicts[0].keys() if x not in self.orig_cols
-        ]
+        if not self.index:
+            return
+
+        first_dict = next(iter(self.frame_dicts.values()))
+        indexes: list[str] = [x for x in first_dict.keys() if x not in self.orig_cols]
 
-        if self.attr_cols and self.index:
+        if self.attr_cols:
             self.attr_cols = indexes + self.attr_cols
 
-        if self.elem_cols and self.index:
+        if self.elem_cols:
             self.elem_cols = indexes + self.elem_cols
 
     def get_prefix_uri(self) -> str:
@@ -307,7 +309,7 @@ def build_tree(self) -> bytes:
             self.elem_row = SubElement(self.root, f"{self.prefix_uri}{self.row_name}")
 
             if not self.attr_cols and not self.elem_cols:
-                self.elem_cols = list(self.frame_dicts[0].keys())
+                self.elem_cols = list(d.keys())
                 self.build_elems()
 
             else:
@@ -477,7 +479,7 @@ def build_tree(self) -> bytes:
             self.elem_row = SubElement(self.root, f"{self.prefix_uri}{self.row_name}")
 
             if not self.attr_cols and not self.elem_cols:
-                self.elem_cols = list(self.frame_dicts[0].keys())
+                self.elem_cols = list(d.keys())
                 self.build_elems()
 
             else:
diff --git a/pandas/tests/io/xml/test_to_xml.py b/pandas/tests/io/xml/test_to_xml.py
index 478f4c803479d..c73966a8d8786 100644
--- a/pandas/tests/io/xml/test_to_xml.py
+++ b/pandas/tests/io/xml/test_to_xml.py
@@ -11,7 +11,10 @@
 
 import pandas.util._test_decorators as td
 
-from pandas import DataFrame
+from pandas import (
+    DataFrame,
+    RangeIndex,
+)
 import pandas._testing as tm
 
 from pandas.io.common import get_handle
@@ -290,6 +293,42 @@ def test_index_false_rename_row_root(datapath, parser):
         assert output == expected
 
 
+def test_index_false_with_offset_input_index(parser):
+    """
+    Tests that the output does not contain the `<index>` field when the index of the
+    input Dataframe has an offset.
+
+    This is a regression test for issue #42458.
+    """
+
+    expected = """\
+<?xml version='1.0' encoding='utf-8'?>
+<data>
+  <row>
+    <shape>square</shape>
+    <degrees>360</degrees>
+    <sides>4.0</sides>
+  </row>
+  <row>
+    <shape>circle</shape>
+    <degrees>360</degrees>
+    <sides/>
+  </row>
+  <row>
+    <shape>triangle</shape>
+    <degrees>180</degrees>
+    <sides>3.0</sides>
+  </row>
+</data>"""
+
+    offset_geom_df = geom_df.copy()
+    offset_geom_df.index = RangeIndex(start=10, stop=13, step=1)
+    output = offset_geom_df.to_xml(index=False, parser=parser)
+    output = equalize_decl(output)
+
+    assert output == expected
+
+
 # NA_REP
 
 na_expected = """\

From 374c78cd50d9f2107c34955b7323839820e8df9e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Stephan=20He=C3=9Felmann=20=28lgtf/39809=29?=
 <stephan.hesselmann@blueyonder.com>
Date: Mon, 12 Jul 2021 17:36:58 +0200
Subject: [PATCH 2/2] Address review comments

---
 pandas/io/formats/xml.py           | 10 ++++++----
 pandas/tests/io/xml/test_to_xml.py |  9 ++++++---
 2 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/pandas/io/formats/xml.py b/pandas/io/formats/xml.py
index e7ed4036fda20..f5ba8c6b53335 100644
--- a/pandas/io/formats/xml.py
+++ b/pandas/io/formats/xml.py
@@ -198,8 +198,10 @@ def handle_indexes(self) -> None:
         if not self.index:
             return
 
-        first_dict = next(iter(self.frame_dicts.values()))
-        indexes: list[str] = [x for x in first_dict.keys() if x not in self.orig_cols]
+        first_key = next(iter(self.frame_dicts))
+        indexes: list[str] = [
+            x for x in self.frame_dicts[first_key].keys() if x not in self.orig_cols
+        ]
 
         if self.attr_cols:
             self.attr_cols = indexes + self.attr_cols
@@ -309,7 +311,7 @@ def build_tree(self) -> bytes:
             self.elem_row = SubElement(self.root, f"{self.prefix_uri}{self.row_name}")
 
             if not self.attr_cols and not self.elem_cols:
-                self.elem_cols = list(d.keys())
+                self.elem_cols = list(self.d.keys())
                 self.build_elems()
 
             else:
@@ -479,7 +481,7 @@ def build_tree(self) -> bytes:
             self.elem_row = SubElement(self.root, f"{self.prefix_uri}{self.row_name}")
 
             if not self.attr_cols and not self.elem_cols:
-                self.elem_cols = list(d.keys())
+                self.elem_cols = list(self.d.keys())
                 self.build_elems()
 
             else:
diff --git a/pandas/tests/io/xml/test_to_xml.py b/pandas/tests/io/xml/test_to_xml.py
index c73966a8d8786..4f4815b9008ad 100644
--- a/pandas/tests/io/xml/test_to_xml.py
+++ b/pandas/tests/io/xml/test_to_xml.py
@@ -13,7 +13,7 @@
 
 from pandas import (
     DataFrame,
-    RangeIndex,
+    Index,
 )
 import pandas._testing as tm
 
@@ -293,7 +293,10 @@ def test_index_false_rename_row_root(datapath, parser):
         assert output == expected
 
 
-def test_index_false_with_offset_input_index(parser):
+@pytest.mark.parametrize(
+    "offset_index", [list(range(10, 13)), [str(i) for i in range(10, 13)]]
+)
+def test_index_false_with_offset_input_index(parser, offset_index):
     """
     Tests that the output does not contain the `<index>` field when the index of the
     input Dataframe has an offset.
@@ -322,7 +325,7 @@ def test_index_false_with_offset_input_index(parser):
 </data>"""
 
     offset_geom_df = geom_df.copy()
-    offset_geom_df.index = RangeIndex(start=10, stop=13, step=1)
+    offset_geom_df.index = Index(offset_index)
     output = offset_geom_df.to_xml(index=False, parser=parser)
     output = equalize_decl(output)