|
1 | 1 | from __future__ import annotations
|
2 | 2 |
|
| 3 | +import re |
3 | 4 | from typing import (
|
4 | 5 | TYPE_CHECKING,
|
5 | 6 | cast,
|
@@ -182,6 +183,20 @@ def _get_column_repeat(self, cell) -> int:
|
182 | 183 |
|
183 | 184 | return int(cell.attributes.get((TABLENS, "number-columns-repeated"), 1))
|
184 | 185 |
|
| 186 | + def _parse_odf_time(self, value: str) -> pd.Timestamp: |
| 187 | + """ |
| 188 | + Helper function to convert ODF variant of ISO 8601 formatted duration |
| 189 | + "PnYnMnDTnHnMnS" - see https://www.w3.org/TR/xmlschema-2/#duration |
| 190 | + """ |
| 191 | + parts = re.match(r"^\s*PT\s*(\d+)\s*H\s*(\d+)\s*M\s*(\d+(\.\d+)?)\s*S$", value) |
| 192 | + if parts is None: |
| 193 | + raise ValueError(f"Failed to parse ODF time value: {value}") |
| 194 | + h, m, s = parts.group(1, 2, 3) |
| 195 | + # ignore date part from some representations as both pd.Timestamp |
| 196 | + # and datetime.time restrict hour values to 0..23 |
| 197 | + h = str(int(h) % 24) |
| 198 | + return pd.Timestamp(f"{h}:{m}:{s}") |
| 199 | + |
185 | 200 | def _get_cell_value(self, cell) -> Scalar | NaTType:
|
186 | 201 | from odf.namespaces import OFFICENS
|
187 | 202 |
|
@@ -214,7 +229,8 @@ def _get_cell_value(self, cell) -> Scalar | NaTType:
|
214 | 229 | cell_value = cell.attributes.get((OFFICENS, "date-value"))
|
215 | 230 | return pd.Timestamp(cell_value)
|
216 | 231 | elif cell_type == "time":
|
217 |
| - stamp = pd.Timestamp(str(cell)) |
| 232 | + cell_value = cell.attributes.get((OFFICENS, "time-value")) |
| 233 | + stamp = self._parse_odf_time(str(cell_value)) |
218 | 234 | # cast needed here because Scalar doesn't include datetime.time
|
219 | 235 | return cast(Scalar, stamp.time())
|
220 | 236 | else:
|
|
0 commit comments