Skip to content

Commit be1e8f2

Browse files
committed
More correctly parse OpenDocument string cells
1 parent b955352 commit be1e8f2

File tree

1 file changed

+22
-1
lines changed

1 file changed

+22
-1
lines changed

pandas/io/excel/_odfreader.py

+22-1
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,7 @@ def _get_cell_value(self, cell, convert_float: bool) -> Scalar:
171171
cell_value = cell.attributes.get((OFFICENS, "value"))
172172
return float(cell_value)
173173
elif cell_type == "string":
174-
return str(cell)
174+
return self._get_cell_string_value(cell)
175175
elif cell_type == "currency":
176176
cell_value = cell.attributes.get((OFFICENS, "value"))
177177
return float(cell_value)
@@ -182,3 +182,24 @@ def _get_cell_value(self, cell, convert_float: bool) -> Scalar:
182182
return pd.to_datetime(str(cell)).time()
183183
else:
184184
raise ValueError(f"Unrecognized type {cell_type}")
185+
186+
def _get_cell_string_value(self, cell):
187+
from odf.element import Text, Element
188+
from odf.text import S, P
189+
from odf.namespaces import TEXTNS
190+
191+
text_p = P().qname
192+
text_s = S().qname
193+
194+
p = cell.childNodes[0]
195+
196+
value = []
197+
if p.qname == text_p:
198+
for k, fragment in enumerate(p.childNodes):
199+
if isinstance(fragment, Text):
200+
value.append(fragment.data)
201+
elif isinstance(fragment, Element):
202+
if fragment.qname == text_s:
203+
spaces = int(fragment.attributes.get((TEXTNS, 'c'), 1))
204+
value.append(' ' * spaces)
205+
return ''.join(value)

0 commit comments

Comments
 (0)