Skip to content

Commit fef8097

Browse files
davidovitchdavidovitch
authored and
davidovitch
committed
refactored ods datetime parser, added parse col selection filter, was not done when copying method from _parse_excel
1 parent c04def9 commit fef8097

File tree

1 file changed

+74
-48
lines changed

1 file changed

+74
-48
lines changed

pandas/io/excel.py

+74-48
Original file line numberDiff line numberDiff line change
@@ -520,36 +520,42 @@ def _parse_ods(self, sheetname=0, header=0, skiprows=None, skip_footer=0,
520520
sheet = self.book.sheets[sheetname]
521521

522522
data = []
523-
523+
should_parse = {}
524524
for i in range(sheet.nrows()):
525525
row = []
526526
for j, cell in enumerate(sheet.row(i)):
527-
if isinstance(cell.value, float):
528-
value = cell.value
529-
if convert_float:
530-
# GH5394 - Excel and ODS 'numbers' are always floats
531-
# it's a minimal perf hit and less suprising
532-
# FIXME: this goes wrong when int(cell.value) returns
533-
# a long (>1e18)
534-
val = int(cell.value)
535-
if val == cell.value:
536-
value = val
537-
elif isinstance(cell.value, compat.string_types):
538-
typ = cell.value_type
539-
# if typ == 'string':
540-
# value = cell.value
541-
if typ == 'date' or typ == 'time':
542-
value = self._parse_datetime(cell)
543-
else:
527+
528+
if parse_cols is not None and j not in should_parse:
529+
should_parse[j] = self._should_parse(j, parse_cols)
530+
531+
if parse_cols is None or should_parse[j]:
532+
533+
if isinstance(cell.value, float):
544534
value = cell.value
545-
elif isinstance(cell.value, bool):
546-
value = cell.value
547-
# elif isinstance(cell.value, type(None)):
548-
# value = np.nan
549-
else:
550-
value = None
535+
if convert_float:
536+
# GH5394 - Excel and ODS 'numbers' are always floats
537+
# it's a minimal perf hit and less suprising
538+
# FIXME: this goes wrong when int(cell.value) returns
539+
# a long (>1e18)
540+
val = int(cell.value)
541+
if val == cell.value:
542+
value = val
543+
elif isinstance(cell.value, compat.string_types):
544+
typ = cell.value_type
545+
# if typ == 'string':
546+
# value = cell.value
547+
if typ == 'date' or typ == 'time':
548+
value = self._parse_datetime(cell)
549+
else:
550+
value = cell.value
551+
elif isinstance(cell.value, bool):
552+
value = cell.value
553+
# elif isinstance(cell.value, type(None)):
554+
# value = np.nan
555+
else:
556+
value = np.nan
551557

552-
row.append(value)
558+
row.append(value)
553559

554560
data.append(row)
555561

@@ -568,36 +574,56 @@ def _parse_ods(self, sheetname=0, header=0, skiprows=None, skip_footer=0,
568574

569575
def _parse_datetime(self, cell):
570576
"""Parse the date or time from on ods cell to a datetime object.
571-
See which formats are returned:
572-
https://pythonhosted.org/ezodf/tableobjects.html#cell-class
577+
Formats returned by ezodf are documented here:
578+
https://pythonhosted.org/ezodf/tableobjects.html#cell-class.
579+
580+
Date string value formats: 'yyyy-mm-dd' or 'yyyy-mm-ddThh:mm:ss'
581+
582+
Time string value format: 'PThhHmmMss,ffffS'
573583
"""
574-
if cell.value_type == 'time' and cell.formula is not None:
584+
def _value2date(value):
575585
try:
576-
value = datetime.datetime.strptime(cell.formula,
577-
'of:=TIME(%H;%M;%S)')
578-
except ValueError:
579-
# hours can be more then 23
580-
hours = int(cell.value[2:].split('H')[0])
581-
minutes = int(cell.value[2:].split('M')[0][-2:])
582-
seconds = int(cell.value[2:].split('M')[1][:-1])
583-
if hours > 23:
586+
return datetime.datetime.strptime(value, '%Y-%m-%d')
587+
except ValueError:#, TypeError):
588+
return datetime.datetime.strptime(value, '%Y-%m-%dT%H:%M:%S')
589+
590+
# Technically it is not necessary to try to derive the date/time
591+
# value from the formula field. The date/time can also be obtained
592+
# from the cell value field. Consequently the approach over the formula
593+
# field could be considered as too verbose and not necessary.
594+
if cell.formula is not None:
595+
if cell.value_type == 'time':
596+
try:
597+
value = datetime.datetime.strptime(cell.formula,
598+
'of:=TIME(%H;%M;%S)')
599+
except ValueError:
600+
# hours can >23, so create timedelta. Format: 'PT%HH%MM%SS'
601+
hours = int(cell.value[2:].split('H')[0])
602+
minutes = int(cell.value[2:].split('M')[0][-2:])
603+
# seconds can also contain micro seconds as a float
604+
seconds = float(cell.value[2:].split('M')[1][:-1])
605+
# TODO: now timedelta objects will be mixed with time
584606
value = datetime.timedelta(hours=hours, minutes=minutes,
585607
seconds=seconds)
586-
else:
587-
# TODO: should return a time object, not datetime?
588-
value = datetime.datetime.strptime(cell.value,
589-
'PT%HH%MM%SS')
590-
# TODO: this does not cover all scenario's
591-
# TODO: now timedelta objects will be mixed with normal time
592-
elif cell.value_type == 'date' and cell.formula is not None:
608+
elif cell.value_type == 'date':
609+
try:
610+
# formula is not by definition the date formula
611+
value = datetime.datetime.strptime(cell.formula,
612+
'of:=DATE(%Y;%m;%d)')
613+
except ValueError:
614+
value = _value2date(cell.value)
615+
else:
616+
value = None
617+
elif cell.value_type == 'date':
618+
value = _value2date(cell.value)
619+
elif cell.value_type == 'time':
593620
try:
594-
value = datetime.datetime.strptime(cell.formula,
595-
'of:=DATE(%Y;%m;%d)')
596-
except (ValueError, TypeError):
597-
# TODO: parsing other scenerio's
598-
value = cell.value
621+
value = datetime.datetime.strptime(cell.value, 'PT%HH%MM%S.%fS')
622+
except ValueError:
623+
value = datetime.datetime.strptime(cell.value, 'PT%HH%MM%SS')
599624
else:
600625
value = None
626+
601627
return value
602628

603629
def _print_ods_cellinfo(self, cell):

0 commit comments

Comments
 (0)