Skip to content

Commit 2bb23e6

Browse files
committed
BUG: parse ODF time values with comments
1 parent 69b5d5a commit 2bb23e6

File tree

4 files changed

+18
-1
lines changed

4 files changed

+18
-1
lines changed

doc/source/whatsnew/v2.2.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,7 @@ Bug fixes
252252
- Bug in :class:`pandas.core.window.Rolling` where duplicate datetimelike indexes are treated as consecutive rather than equal with ``closed='left'`` and ``closed='neither'`` (:issue:`20712`)
253253
- Bug in :meth:`DataFrame.apply` where passing ``raw=True`` ignored ``args`` passed to the applied function (:issue:`55009`)
254254
- Bug in :meth:`pandas.read_excel` with a ODS file without cached formatted cell for float values (:issue:`55219`)
255+
- Bug in :meth:`pandas.read_excel` where ODS files with comments on time value cells failed to parse (related to :issue:`55200`)
255256

256257
Categorical
257258
^^^^^^^^^^^

pandas/io/excel/_odfreader.py

+17-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from __future__ import annotations
22

3+
import re
34
from typing import (
45
TYPE_CHECKING,
56
cast,
@@ -182,6 +183,20 @@ def _get_column_repeat(self, cell) -> int:
182183

183184
return int(cell.attributes.get((TABLENS, "number-columns-repeated"), 1))
184185

186+
def _parse_odf_time(self, value: str) -> pd.Timestamp:
187+
"""
188+
Helper function to convert ODF variant of ISO 8601 formatted duration
189+
"PnYnMnDTnHnMnS" - see https://www.w3.org/TR/xmlschema-2/#duration
190+
"""
191+
parts = re.match(r"^\s*PT\s*(\d+)\s*H\s*(\d+)\s*M\s*(\d+(\.\d+)?)\s*S$", value)
192+
if parts is None:
193+
raise ValueError(f"Failed to parse ODF time value: {value}")
194+
h, m, s = parts.group(1, 2, 3)
195+
# ignore date part from some representations as both pd.Timestamp
196+
# and datetime.time restrict hour values to 0..23
197+
h = str(int(h) % 24)
198+
return pd.Timestamp(f"{h}:{m}:{s}")
199+
185200
def _get_cell_value(self, cell) -> Scalar | NaTType:
186201
from odf.namespaces import OFFICENS
187202

@@ -214,7 +229,8 @@ def _get_cell_value(self, cell) -> Scalar | NaTType:
214229
cell_value = cell.attributes.get((OFFICENS, "date-value"))
215230
return pd.Timestamp(cell_value)
216231
elif cell_type == "time":
217-
stamp = pd.Timestamp(str(cell))
232+
cell_value = cell.attributes.get((OFFICENS, "time-value"))
233+
stamp = self._parse_odf_time(str(cell_value))
218234
# cast needed here because Scalar doesn't include datetime.time
219235
return cast(Scalar, stamp.time())
220236
else:
10.9 KB
Binary file not shown.
10.3 KB
Binary file not shown.

0 commit comments

Comments
 (0)