@@ -520,36 +520,42 @@ def _parse_ods(self, sheetname=0, header=0, skiprows=None, skip_footer=0,
520
520
sheet = self .book .sheets [sheetname ]
521
521
522
522
data = []
523
-
523
+ should_parse = {}
524
524
for i in range (sheet .nrows ()):
525
525
row = []
526
526
for j , cell in enumerate (sheet .row (i )):
527
- if isinstance (cell .value , float ):
528
- value = cell .value
529
- if convert_float :
530
- # GH5394 - Excel and ODS 'numbers' are always floats
531
- # it's a minimal perf hit and less suprising
532
- # FIXME: this goes wrong when int(cell.value) returns
533
- # a long (>1e18)
534
- val = int (cell .value )
535
- if val == cell .value :
536
- value = val
537
- elif isinstance (cell .value , compat .string_types ):
538
- typ = cell .value_type
539
- # if typ == 'string':
540
- # value = cell.value
541
- if typ == 'date' or typ == 'time' :
542
- value = self ._parse_datetime (cell )
543
- else :
527
+
528
+ if parse_cols is not None and j not in should_parse :
529
+ should_parse [j ] = self ._should_parse (j , parse_cols )
530
+
531
+ if parse_cols is None or should_parse [j ]:
532
+
533
+ if isinstance (cell .value , float ):
544
534
value = cell .value
545
- elif isinstance (cell .value , bool ):
546
- value = cell .value
547
- # elif isinstance(cell.value, type(None)):
548
- # value = np.nan
549
- else :
550
- value = None
535
+ if convert_float :
536
+ # GH5394 - Excel and ODS 'numbers' are always floats
537
+ # it's a minimal perf hit and less suprising
538
+ # FIXME: this goes wrong when int(cell.value) returns
539
+ # a long (>1e18)
540
+ val = int (cell .value )
541
+ if val == cell .value :
542
+ value = val
543
+ elif isinstance (cell .value , compat .string_types ):
544
+ typ = cell .value_type
545
+ # if typ == 'string':
546
+ # value = cell.value
547
+ if typ == 'date' or typ == 'time' :
548
+ value = self ._parse_datetime (cell )
549
+ else :
550
+ value = cell .value
551
+ elif isinstance (cell .value , bool ):
552
+ value = cell .value
553
+ # elif isinstance(cell.value, type(None)):
554
+ # value = np.nan
555
+ else :
556
+ value = np .nan
551
557
552
- row .append (value )
558
+ row .append (value )
553
559
554
560
data .append (row )
555
561
@@ -568,36 +574,56 @@ def _parse_ods(self, sheetname=0, header=0, skiprows=None, skip_footer=0,
568
574
569
575
def _parse_datetime (self , cell ):
570
576
"""Parse the date or time from on ods cell to a datetime object.
571
- See which formats are returned:
572
- https://pythonhosted.org/ezodf/tableobjects.html#cell-class
577
+ Formats returned by ezodf are documented here:
578
+ https://pythonhosted.org/ezodf/tableobjects.html#cell-class.
579
+
580
+ Date string value formats: 'yyyy-mm-dd' or 'yyyy-mm-ddThh:mm:ss'
581
+
582
+ Time string value format: 'PThhHmmMss,ffffS'
573
583
"""
574
- if cell . value_type == 'time' and cell . formula is not None :
584
+ def _value2date ( value ) :
575
585
try :
576
- value = datetime .datetime .strptime (cell .formula ,
577
- 'of:=TIME(%H;%M;%S)' )
578
- except ValueError :
579
- # hours can be more then 23
580
- hours = int (cell .value [2 :].split ('H' )[0 ])
581
- minutes = int (cell .value [2 :].split ('M' )[0 ][- 2 :])
582
- seconds = int (cell .value [2 :].split ('M' )[1 ][:- 1 ])
583
- if hours > 23 :
586
+ return datetime .datetime .strptime (value , '%Y-%m-%d' )
587
+ except ValueError :#, TypeError):
588
+ return datetime .datetime .strptime (value , '%Y-%m-%dT%H:%M:%S' )
589
+
590
+ # Technically it is not necessary to try to derive the date/time
591
+ # value from the formula field. The date/time can also be obtained
592
+ # from the cell value field. Consequently the approach over the formula
593
+ # field could be considered as too verbose and not necessary.
594
+ if cell .formula is not None :
595
+ if cell .value_type == 'time' :
596
+ try :
597
+ value = datetime .datetime .strptime (cell .formula ,
598
+ 'of:=TIME(%H;%M;%S)' )
599
+ except ValueError :
600
+ # hours can >23, so create timedelta. Format: 'PT%HH%MM%SS'
601
+ hours = int (cell .value [2 :].split ('H' )[0 ])
602
+ minutes = int (cell .value [2 :].split ('M' )[0 ][- 2 :])
603
+ # seconds can also contain micro seconds as a float
604
+ seconds = float (cell .value [2 :].split ('M' )[1 ][:- 1 ])
605
+ # TODO: now timedelta objects will be mixed with time
584
606
value = datetime .timedelta (hours = hours , minutes = minutes ,
585
607
seconds = seconds )
586
- else :
587
- # TODO: should return a time object, not datetime?
588
- value = datetime .datetime .strptime (cell .value ,
589
- 'PT%HH%MM%SS' )
590
- # TODO: this does not cover all scenario's
591
- # TODO: now timedelta objects will be mixed with normal time
592
- elif cell .value_type == 'date' and cell .formula is not None :
608
+ elif cell .value_type == 'date' :
609
+ try :
610
+ # formula is not by definition the date formula
611
+ value = datetime .datetime .strptime (cell .formula ,
612
+ 'of:=DATE(%Y;%m;%d)' )
613
+ except ValueError :
614
+ value = _value2date (cell .value )
615
+ else :
616
+ value = None
617
+ elif cell .value_type == 'date' :
618
+ value = _value2date (cell .value )
619
+ elif cell .value_type == 'time' :
593
620
try :
594
- value = datetime .datetime .strptime (cell .formula ,
595
- 'of:=DATE(%Y;%m;%d)' )
596
- except (ValueError , TypeError ):
597
- # TODO: parsing other scenerio's
598
- value = cell .value
621
+ value = datetime .datetime .strptime (cell .value , 'PT%HH%MM%S.%fS' )
622
+ except ValueError :
623
+ value = datetime .datetime .strptime (cell .value , 'PT%HH%MM%SS' )
599
624
else :
600
625
value = None
626
+
601
627
return value
602
628
603
629
def _print_ods_cellinfo (self , cell ):
0 commit comments