Skip to content

Commit 88bc2e4

Browse files
authored
BUG: read_excel for ods files raising UnboundLocalError in certain cases (#36175)
1 parent a3c4dc8 commit 88bc2e4

File tree

5 files changed

+32
-12
lines changed

5 files changed

+32
-12
lines changed

doc/source/whatsnew/v1.2.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -315,6 +315,7 @@ I/O
315315
- :meth:`to_csv` did not support zip compression for binary file object not having a filename (:issue: `35058`)
316316
- :meth:`to_csv` and :meth:`read_csv` did not honor `compression` and `encoding` for path-like objects that are internally converted to file-like objects (:issue:`35677`, :issue:`26124`, and :issue:`32392`)
317317
- :meth:`to_picke` and :meth:`read_pickle` did not support compression for file-objects (:issue:`26237`, :issue:`29054`, and :issue:`29570`)
318+
- Bug in :meth:`read_excel` with `engine="odf"` caused ``UnboundLocalError`` in some cases where cells had nested child nodes (:issue:`36122`, and :issue:`35802`)
318319

319320
Plotting
320321
^^^^^^^^

pandas/io/excel/_odfreader.py

+14-12
Original file line numberDiff line numberDiff line change
@@ -197,22 +197,24 @@ def _get_cell_string_value(self, cell) -> str:
197197
Find and decode OpenDocument text:s tags that represent
198198
a run length encoded sequence of space characters.
199199
"""
200-
from odf.element import Element, Text
200+
from odf.element import Element
201201
from odf.namespaces import TEXTNS
202-
from odf.text import P, S
202+
from odf.text import S
203203

204-
text_p = P().qname
205204
text_s = S().qname
206205

207-
p = cell.childNodes[0]
208-
209206
value = []
210-
if p.qname == text_p:
211-
for k, fragment in enumerate(p.childNodes):
212-
if isinstance(fragment, Text):
213-
value.append(fragment.data)
214-
elif isinstance(fragment, Element):
215-
if fragment.qname == text_s:
216-
spaces = int(fragment.attributes.get((TEXTNS, "c"), 1))
207+
208+
for fragment in cell.childNodes:
209+
if isinstance(fragment, Element):
210+
if fragment.qname == text_s:
211+
spaces = int(fragment.attributes.get((TEXTNS, "c"), 1))
217212
value.append(" " * spaces)
213+
else:
214+
# recursive impl needed in case of nested fragments
215+
# with multiple spaces
216+
# https://github.com/pandas-dev/pandas/pull/36175#discussion_r484639704
217+
value.append(self._get_cell_string_value(fragment))
218+
else:
219+
value.append(str(fragment))
218220
return "".join(value)
12.4 KB
Binary file not shown.
8.76 KB
Binary file not shown.

pandas/tests/io/excel/test_readers.py

+17
Original file line numberDiff line numberDiff line change
@@ -499,6 +499,23 @@ def test_reader_spaces(self, read_ext):
499499
)
500500
tm.assert_frame_equal(actual, expected)
501501

502+
# gh-36122, gh-35802
503+
@pytest.mark.parametrize(
504+
"basename,expected",
505+
[
506+
("gh-35802", DataFrame({"COLUMN": ["Test (1)"]})),
507+
("gh-36122", DataFrame(columns=["got 2nd sa"])),
508+
],
509+
)
510+
def test_read_excel_ods_nested_xml(self, read_ext, basename, expected):
511+
# see gh-35802
512+
engine = pd.read_excel.keywords["engine"]
513+
if engine != "odf":
514+
pytest.skip(f"Skipped for engine: {engine}")
515+
516+
actual = pd.read_excel(basename + read_ext)
517+
tm.assert_frame_equal(actual, expected)
518+
502519
def test_reading_all_sheets(self, read_ext):
503520
# Test reading all sheet names by setting sheet_name to None,
504521
# Ensure a dict is returned.

0 commit comments

Comments
 (0)