Skip to content

Commit 7361ccb

Browse files
Backport PR #36175: BUG: read_excel for ods files raising UnboundLocalError in certain cases (#36355)
Co-authored-by: Asish Mahapatra <[email protected]>
1 parent 2dd1f08 commit 7361ccb

File tree

5 files changed

+32
-12
lines changed

5 files changed

+32
-12
lines changed

doc/source/whatsnew/v1.1.3.rst

+1
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ Fixed regressions
1818
- Fixed regression in :class:`IntegerArray` unary plus and minus operations raising a ``TypeError`` (:issue:`36063`)
1919
- Fixed regression in :meth:`Series.__getitem__` incorrectly raising when the input was a tuple (:issue:`35534`)
2020
- Fixed regression in :meth:`Series.__getitem__` incorrectly raising when the input was a frozenset (:issue:`35747`)
21+
- Fixed regression in :meth:`read_excel` with `engine="odf"` caused ``UnboundLocalError`` in some cases where cells had nested child nodes (:issue:`36122`, and :issue:`35802`)
2122
-
2223

2324
.. ---------------------------------------------------------------------------

pandas/io/excel/_odfreader.py

+14-12
Original file line numberDiff line numberDiff line change
@@ -191,22 +191,24 @@ def _get_cell_string_value(self, cell) -> str:
191191
Find and decode OpenDocument text:s tags that represent
192192
a run length encoded sequence of space characters.
193193
"""
194-
from odf.element import Element, Text
194+
from odf.element import Element
195195
from odf.namespaces import TEXTNS
196-
from odf.text import P, S
196+
from odf.text import S
197197

198-
text_p = P().qname
199198
text_s = S().qname
200199

201-
p = cell.childNodes[0]
202-
203200
value = []
204-
if p.qname == text_p:
205-
for k, fragment in enumerate(p.childNodes):
206-
if isinstance(fragment, Text):
207-
value.append(fragment.data)
208-
elif isinstance(fragment, Element):
209-
if fragment.qname == text_s:
210-
spaces = int(fragment.attributes.get((TEXTNS, "c"), 1))
201+
202+
for fragment in cell.childNodes:
203+
if isinstance(fragment, Element):
204+
if fragment.qname == text_s:
205+
spaces = int(fragment.attributes.get((TEXTNS, "c"), 1))
211206
value.append(" " * spaces)
207+
else:
208+
# recursive impl needed in case of nested fragments
209+
# with multiple spaces
210+
# https://github.com/pandas-dev/pandas/pull/36175#discussion_r484639704
211+
value.append(self._get_cell_string_value(fragment))
212+
else:
213+
value.append(str(fragment))
212214
return "".join(value)
12.4 KB
Binary file not shown.
8.76 KB
Binary file not shown.

pandas/tests/io/excel/test_readers.py

+17
Original file line numberDiff line numberDiff line change
@@ -519,6 +519,23 @@ def test_reader_spaces(self, read_ext):
519519
)
520520
tm.assert_frame_equal(actual, expected)
521521

522+
# gh-36122, gh-35802
523+
@pytest.mark.parametrize(
524+
"basename,expected",
525+
[
526+
("gh-35802", DataFrame({"COLUMN": ["Test (1)"]})),
527+
("gh-36122", DataFrame(columns=["got 2nd sa"])),
528+
],
529+
)
530+
def test_read_excel_ods_nested_xml(self, read_ext, basename, expected):
531+
# see gh-35802
532+
engine = pd.read_excel.keywords["engine"]
533+
if engine != "odf":
534+
pytest.skip(f"Skipped for engine: {engine}")
535+
536+
actual = pd.read_excel(basename + read_ext)
537+
tm.assert_frame_equal(actual, expected)
538+
522539
def test_reading_all_sheets(self, read_ext):
523540
# Test reading all sheet names by setting sheet_name to None,
524541
# Ensure a dict is returned.

0 commit comments

Comments
 (0)