@@ -447,36 +447,42 @@ def _parse_ods(self, sheetname=0, header=0, skiprows=None, skip_footer=0,
447
447
sheet = self .book .sheets [sheetname ]
448
448
449
449
data = []
450
-
450
+ should_parse = {}
451
451
for i in range (sheet .nrows ()):
452
452
row = []
453
453
for j , cell in enumerate (sheet .row (i )):
454
- if isinstance (cell .value , float ):
455
- value = cell .value
456
- if convert_float :
457
- # GH5394 - Excel and ODS 'numbers' are always floats
458
- # it's a minimal perf hit and less suprising
459
- # FIXME: this goes wrong when int(cell.value) returns
460
- # a long (>1e18)
461
- val = int (cell .value )
462
- if val == cell .value :
463
- value = val
464
- elif isinstance (cell .value , compat .string_types ):
465
- typ = cell .value_type
466
- # if typ == 'string':
467
- # value = cell.value
468
- if typ == 'date' or typ == 'time' :
469
- value = self ._parse_datetime (cell )
470
- else :
454
+
455
+ if parse_cols is not None and j not in should_parse :
456
+ should_parse [j ] = self ._should_parse (j , parse_cols )
457
+
458
+ if parse_cols is None or should_parse [j ]:
459
+
460
+ if isinstance (cell .value , float ):
471
461
value = cell .value
472
- elif isinstance (cell .value , bool ):
473
- value = cell .value
474
- # elif isinstance(cell.value, type(None)):
475
- # value = np.nan
476
- else :
477
- value = None
462
+ if convert_float :
463
+ # GH5394 - Excel and ODS 'numbers' are always floats
464
+ # it's a minimal perf hit and less suprising
465
+ # FIXME: this goes wrong when int(cell.value) returns
466
+ # a long (>1e18)
467
+ val = int (cell .value )
468
+ if val == cell .value :
469
+ value = val
470
+ elif isinstance (cell .value , compat .string_types ):
471
+ typ = cell .value_type
472
+ # if typ == 'string':
473
+ # value = cell.value
474
+ if typ == 'date' or typ == 'time' :
475
+ value = self ._parse_datetime (cell )
476
+ else :
477
+ value = cell .value
478
+ elif isinstance (cell .value , bool ):
479
+ value = cell .value
480
+ # elif isinstance(cell.value, type(None)):
481
+ # value = np.nan
482
+ else :
483
+ value = np .nan
478
484
479
- row .append (value )
485
+ row .append (value )
480
486
481
487
data .append (row )
482
488
@@ -495,36 +501,56 @@ def _parse_ods(self, sheetname=0, header=0, skiprows=None, skip_footer=0,
495
501
496
502
def _parse_datetime (self , cell ):
497
503
"""Parse the date or time from on ods cell to a datetime object.
498
- See which formats are returned:
499
- https://pythonhosted.org/ezodf/tableobjects.html#cell-class
504
+ Formats returned by ezodf are documented here:
505
+ https://pythonhosted.org/ezodf/tableobjects.html#cell-class.
506
+
507
+ Date string value formats: 'yyyy-mm-dd' or 'yyyy-mm-ddThh:mm:ss'
508
+
509
+ Time string value format: 'PThhHmmMss,ffffS'
500
510
"""
501
- if cell . value_type == 'time' and cell . formula is not None :
511
+ def _value2date ( value ) :
502
512
try :
503
- value = datetime .datetime .strptime (cell .formula ,
504
- 'of:=TIME(%H;%M;%S)' )
505
- except ValueError :
506
- # hours can be more then 23
507
- hours = int (cell .value [2 :].split ('H' )[0 ])
508
- minutes = int (cell .value [2 :].split ('M' )[0 ][- 2 :])
509
- seconds = int (cell .value [2 :].split ('M' )[1 ][:- 1 ])
510
- if hours > 23 :
513
+ return datetime .datetime .strptime (value , '%Y-%m-%d' )
514
+ except ValueError :#, TypeError):
515
+ return datetime .datetime .strptime (value , '%Y-%m-%dT%H:%M:%S' )
516
+
517
+ # Technically it is not necessary to try to derive the date/time
518
+ # value from the formula field. The date/time can also be obtained
519
+ # from the cell value field. Consequently the approach over the formula
520
+ # field could be considered as too verbose and not necessary.
521
+ if cell .formula is not None :
522
+ if cell .value_type == 'time' :
523
+ try :
524
+ value = datetime .datetime .strptime (cell .formula ,
525
+ 'of:=TIME(%H;%M;%S)' )
526
+ except ValueError :
527
+ # hours can >23, so create timedelta. Format: 'PT%HH%MM%SS'
528
+ hours = int (cell .value [2 :].split ('H' )[0 ])
529
+ minutes = int (cell .value [2 :].split ('M' )[0 ][- 2 :])
530
+ # seconds can also contain micro seconds as a float
531
+ seconds = float (cell .value [2 :].split ('M' )[1 ][:- 1 ])
532
+ # TODO: now timedelta objects will be mixed with time
511
533
value = datetime .timedelta (hours = hours , minutes = minutes ,
512
534
seconds = seconds )
513
- else :
514
- # TODO: should return a time object, not datetime?
515
- value = datetime .datetime .strptime (cell .value ,
516
- 'PT%HH%MM%SS' )
517
- # TODO: this does not cover all scenario's
518
- # TODO: now timedelta objects will be mixed with normal time
519
- elif cell .value_type == 'date' and cell .formula is not None :
535
+ elif cell .value_type == 'date' :
536
+ try :
537
+ # formula is not by definition the date formula
538
+ value = datetime .datetime .strptime (cell .formula ,
539
+ 'of:=DATE(%Y;%m;%d)' )
540
+ except ValueError :
541
+ value = _value2date (cell .value )
542
+ else :
543
+ value = None
544
+ elif cell .value_type == 'date' :
545
+ value = _value2date (cell .value )
546
+ elif cell .value_type == 'time' :
520
547
try :
521
- value = datetime .datetime .strptime (cell .formula ,
522
- 'of:=DATE(%Y;%m;%d)' )
523
- except (ValueError , TypeError ):
524
- # TODO: parsing other scenerio's
525
- value = cell .value
548
+ value = datetime .datetime .strptime (cell .value , 'PT%HH%MM%S.%fS' )
549
+ except ValueError :
550
+ value = datetime .datetime .strptime (cell .value , 'PT%HH%MM%SS' )
526
551
else :
527
552
value = None
553
+
528
554
return value
529
555
530
556
def _print_ods_cellinfo (self , cell ):
0 commit comments