Skip to content

Commit 63d28a0

Browse files
davidovitchdavidovitch
authored and
davidovitch
committed
refactored ods datetime parser, added parse col selection filter, was not done when copying method from _parse_excel
1 parent 87a54cb commit 63d28a0

File tree

1 file changed

+74
-48
lines changed

1 file changed

+74
-48
lines changed

pandas/io/excel.py

+74-48
Original file line numberDiff line numberDiff line change
@@ -447,36 +447,42 @@ def _parse_ods(self, sheetname=0, header=0, skiprows=None, skip_footer=0,
447447
sheet = self.book.sheets[sheetname]
448448

449449
data = []
450-
450+
should_parse = {}
451451
for i in range(sheet.nrows()):
452452
row = []
453453
for j, cell in enumerate(sheet.row(i)):
454-
if isinstance(cell.value, float):
455-
value = cell.value
456-
if convert_float:
457-
# GH5394 - Excel and ODS 'numbers' are always floats
458-
# it's a minimal perf hit and less suprising
459-
# FIXME: this goes wrong when int(cell.value) returns
460-
# a long (>1e18)
461-
val = int(cell.value)
462-
if val == cell.value:
463-
value = val
464-
elif isinstance(cell.value, compat.string_types):
465-
typ = cell.value_type
466-
# if typ == 'string':
467-
# value = cell.value
468-
if typ == 'date' or typ == 'time':
469-
value = self._parse_datetime(cell)
470-
else:
454+
455+
if parse_cols is not None and j not in should_parse:
456+
should_parse[j] = self._should_parse(j, parse_cols)
457+
458+
if parse_cols is None or should_parse[j]:
459+
460+
if isinstance(cell.value, float):
471461
value = cell.value
472-
elif isinstance(cell.value, bool):
473-
value = cell.value
474-
# elif isinstance(cell.value, type(None)):
475-
# value = np.nan
476-
else:
477-
value = None
462+
if convert_float:
463+
# GH5394 - Excel and ODS 'numbers' are always floats
464+
# it's a minimal perf hit and less suprising
465+
# FIXME: this goes wrong when int(cell.value) returns
466+
# a long (>1e18)
467+
val = int(cell.value)
468+
if val == cell.value:
469+
value = val
470+
elif isinstance(cell.value, compat.string_types):
471+
typ = cell.value_type
472+
# if typ == 'string':
473+
# value = cell.value
474+
if typ == 'date' or typ == 'time':
475+
value = self._parse_datetime(cell)
476+
else:
477+
value = cell.value
478+
elif isinstance(cell.value, bool):
479+
value = cell.value
480+
# elif isinstance(cell.value, type(None)):
481+
# value = np.nan
482+
else:
483+
value = np.nan
478484

479-
row.append(value)
485+
row.append(value)
480486

481487
data.append(row)
482488

@@ -495,36 +501,56 @@ def _parse_ods(self, sheetname=0, header=0, skiprows=None, skip_footer=0,
495501

496502
def _parse_datetime(self, cell):
497503
"""Parse the date or time from on ods cell to a datetime object.
498-
See which formats are returned:
499-
https://pythonhosted.org/ezodf/tableobjects.html#cell-class
504+
Formats returned by ezodf are documented here:
505+
https://pythonhosted.org/ezodf/tableobjects.html#cell-class.
506+
507+
Date string value formats: 'yyyy-mm-dd' or 'yyyy-mm-ddThh:mm:ss'
508+
509+
Time string value format: 'PThhHmmMss,ffffS'
500510
"""
501-
if cell.value_type == 'time' and cell.formula is not None:
511+
def _value2date(value):
502512
try:
503-
value = datetime.datetime.strptime(cell.formula,
504-
'of:=TIME(%H;%M;%S)')
505-
except ValueError:
506-
# hours can be more then 23
507-
hours = int(cell.value[2:].split('H')[0])
508-
minutes = int(cell.value[2:].split('M')[0][-2:])
509-
seconds = int(cell.value[2:].split('M')[1][:-1])
510-
if hours > 23:
513+
return datetime.datetime.strptime(value, '%Y-%m-%d')
514+
except ValueError:#, TypeError):
515+
return datetime.datetime.strptime(value, '%Y-%m-%dT%H:%M:%S')
516+
517+
# Technically it is not necessary to try to derive the date/time
518+
# value from the formula field. The date/time can also be obtained
519+
# from the cell value field. Consequently the approach over the formula
520+
# field could be considered as too verbose and not necessary.
521+
if cell.formula is not None:
522+
if cell.value_type == 'time':
523+
try:
524+
value = datetime.datetime.strptime(cell.formula,
525+
'of:=TIME(%H;%M;%S)')
526+
except ValueError:
527+
# hours can >23, so create timedelta. Format: 'PT%HH%MM%SS'
528+
hours = int(cell.value[2:].split('H')[0])
529+
minutes = int(cell.value[2:].split('M')[0][-2:])
530+
# seconds can also contain micro seconds as a float
531+
seconds = float(cell.value[2:].split('M')[1][:-1])
532+
# TODO: now timedelta objects will be mixed with time
511533
value = datetime.timedelta(hours=hours, minutes=minutes,
512534
seconds=seconds)
513-
else:
514-
# TODO: should return a time object, not datetime?
515-
value = datetime.datetime.strptime(cell.value,
516-
'PT%HH%MM%SS')
517-
# TODO: this does not cover all scenario's
518-
# TODO: now timedelta objects will be mixed with normal time
519-
elif cell.value_type == 'date' and cell.formula is not None:
535+
elif cell.value_type == 'date':
536+
try:
537+
# formula is not by definition the date formula
538+
value = datetime.datetime.strptime(cell.formula,
539+
'of:=DATE(%Y;%m;%d)')
540+
except ValueError:
541+
value = _value2date(cell.value)
542+
else:
543+
value = None
544+
elif cell.value_type == 'date':
545+
value = _value2date(cell.value)
546+
elif cell.value_type == 'time':
520547
try:
521-
value = datetime.datetime.strptime(cell.formula,
522-
'of:=DATE(%Y;%m;%d)')
523-
except (ValueError, TypeError):
524-
# TODO: parsing other scenerio's
525-
value = cell.value
548+
value = datetime.datetime.strptime(cell.value, 'PT%HH%MM%S.%fS')
549+
except ValueError:
550+
value = datetime.datetime.strptime(cell.value, 'PT%HH%MM%SS')
526551
else:
527552
value = None
553+
528554
return value
529555

530556
def _print_ods_cellinfo(self, cell):

0 commit comments

Comments
 (0)