@@ -73,9 +73,14 @@ class BaseFile(object):
73
73
""" Class for identifying the type of reader
74
74
"""
75
75
76
- def __init__ (self , try_engine = False ):
76
+ def __init__ (self , engine , extensions , io_class , open_workbook ,
77
+ try_engine = False ):
78
+ self .engine = engine
79
+ self .extensions = extensions
80
+ self .io_class = io_class
81
+ self .open_workbook = open_workbook
77
82
if try_engine :
78
- self .has_engine ()
83
+ self .load_engine ()
79
84
80
85
def is_ext (self , path ):
81
86
"""Verify if the path's extension is supported by the reader
@@ -94,26 +99,29 @@ def is_type(self, io):
94
99
else :
95
100
return False
96
101
97
- def has_engine (self ):
98
- """Verify if the engine is installed
102
+ def load_engine (self ):
103
+ """Load the engine if installed
99
104
"""
100
105
try :
101
- self .load_engine ()
106
+ self ._load_engine ()
102
107
_readers [self .engine ] = True
103
108
except ImportError :
104
109
_readers [self .engine ] = False
110
+ except AttributeError :
111
+ _readers [self .engine ] = False
112
+ msg = 'Excel engine "%s" is not implemented' % self .engine
113
+ raise NotImplementedError (msg )
105
114
106
115
107
116
class XLRDFile (BaseFile ):
108
117
109
- def __init__ (self , ** kwargs ):
110
- self .engine = 'xlrd'
111
- self .extensions = ['xls' , 'xlsx' , 'xlsm' ]
112
- self .io_class = type (None )
113
- self .open_workbook = None
114
- super (XLRDFile , self ).__init__ (** kwargs )
118
+ def __init__ (self , try_engine = False ):
119
+ # engine, extensions, are defined here, but io_class and open_workbook
120
+ # are only defined when importing the engine
121
+ args = ('xlrd' , ['xls' , 'xlsx' , 'xlsm' ], type (None ), None )
122
+ super (XLRDFile , self ).__init__ (* args , try_engine = try_engine )
115
123
116
- def load_engine (self ):
124
+ def _load_engine (self ):
117
125
import xlrd # throw an ImportError if we need to
118
126
ver = tuple (map (int , xlrd .__VERSION__ .split ("." )[:2 ]))
119
127
if ver < (0 , 9 ): # pragma: no cover
@@ -126,14 +134,13 @@ def load_engine(self):
126
134
127
135
class EZODFFile (BaseFile ):
128
136
129
- def __init__ (self , ** kwargs ):
130
- self .engine = 'ezodf'
131
- self .extensions = ['ods' ]
132
- self .io_class = type (None )
133
- self .open_workbook = None
134
- super (EZODFFile , self ).__init__ (** kwargs )
137
+ def __init__ (self , try_engine = False ):
138
+ # engine, extensions, are defined here, but io_class and open_workbook
139
+ # are only defined when importing the engine
140
+ args = ('ezodf' , ['ods' ], type (None ), None )
141
+ super (EZODFFile , self ).__init__ (* args , try_engine = try_engine )
135
142
136
- def load_engine (self ):
143
+ def _load_engine (self ):
137
144
import ezodf
138
145
self .open_workbook = ezodf .opendoc
139
146
self .io_class = ezodf .document .PackagedDocument
@@ -150,17 +157,17 @@ def read_excel(io, sheetname=0, **kwds):
150
157
and file. For file URLs, a host is expected. For instance, a local
151
158
file could be file://localhost/path/to/workbook.xlsx
152
159
sheetname : string, int, mixed list of strings/ints, or None, default 0
153
-
154
- Strings are used for sheet names, Integers are used in zero-indexed sheet
155
- positions.
156
-
160
+
161
+ Strings are used for sheet names, Integers are used in zero-indexed sheet
162
+ positions.
163
+
157
164
Lists of strings/integers are used to request multiple sheets.
158
-
165
+
159
166
Specify None to get all sheets.
160
-
167
+
161
168
str|int -> DataFrame is returned.
162
169
list|None -> Dict of DataFrames is returned, with keys representing sheets.
163
-
170
+
164
171
Available Cases
165
172
166
173
* Defaults to 0 -> 1st sheet as a DataFrame
@@ -293,19 +300,19 @@ def parse(self, sheetname=0, header=0, skiprows=None, skip_footer=0,
293
300
Parameters
294
301
----------
295
302
sheetname : string, int, mixed list of strings/ints, or None, default 0
296
-
297
- Strings are used for sheet names, Integers are used in zero-indexed sheet
298
- positions.
299
-
303
+
304
+ Strings are used for sheet names, Integers are used in zero-indexed sheet
305
+ positions.
306
+
300
307
Lists of strings/integers are used to request multiple sheets.
301
-
308
+
302
309
Specify None to get all sheets.
303
-
310
+
304
311
str|int -> DataFrame is returned.
305
312
list|None -> Dict of DataFrames is returned, with keys representing sheets.
306
-
313
+
307
314
Available Cases
308
-
315
+
309
316
* Defaults to 0 -> 1st sheet as a DataFrame
310
317
* 1 -> 2nd sheet as a DataFrame
311
318
* "Sheet1" -> 1st sheet as a DataFrame
@@ -426,10 +433,10 @@ def _parse_excel(self, sheetname=0, header=0, skiprows=None, skip_footer=0,
426
433
427
434
epoch1904 = self .book .datemode
428
435
429
- def _parse_cell (cell_contents ,cell_typ ):
436
+ def _parse_cell (cell_contents , cell_typ ):
430
437
"""converts the contents of the cell into a pandas
431
438
appropriate object"""
432
-
439
+
433
440
if cell_typ == XL_CELL_DATE :
434
441
if xlrd_0_9_3 :
435
442
# Use the newer xlrd datetime handling.
@@ -472,10 +479,10 @@ def _parse_cell(cell_contents,cell_typ):
472
479
xlrd_0_9_3 = True
473
480
else :
474
481
xlrd_0_9_3 = False
475
-
482
+
476
483
ret_dict = False
477
-
478
- #Keep sheetname to maintain backwards compatibility.
484
+
485
+ # Keep sheetname to maintain backwards compatibility.
479
486
if isinstance (sheetname , list ):
480
487
sheets = sheetname
481
488
ret_dict = True
@@ -484,38 +491,38 @@ def _parse_cell(cell_contents,cell_typ):
484
491
ret_dict = True
485
492
else :
486
493
sheets = [sheetname ]
487
-
488
- #handle same-type duplicates.
494
+
495
+ # handle same-type duplicates.
489
496
sheets = list (set (sheets ))
490
-
497
+
491
498
output = {}
492
-
499
+
493
500
for asheetname in sheets :
494
501
if verbose :
495
502
print ("Reading sheet %s" % asheetname )
496
-
503
+
497
504
if isinstance (asheetname , compat .string_types ):
498
505
sheet = self .book .sheet_by_name (asheetname )
499
- else : # assume an integer if not a string
500
- sheet = self .book .sheet_by_index (asheetname )
501
-
506
+ else : # assume an integer if not a string
507
+ sheet = self .book .sheet_by_index (asheetname )
508
+
502
509
data = []
503
510
should_parse = {}
504
-
511
+
505
512
for i in range (sheet .nrows ):
506
513
row = []
507
514
for j , (value , typ ) in enumerate (zip (sheet .row_values (i ),
508
515
sheet .row_types (i ))):
509
516
if parse_cols is not None and j not in should_parse :
510
517
should_parse [j ] = self ._should_parse (j , parse_cols )
511
-
518
+
512
519
if parse_cols is None or should_parse [j ]:
513
- row .append (_parse_cell (value ,typ ))
520
+ row .append (_parse_cell (value , typ ))
514
521
data .append (row )
515
-
522
+
516
523
if header is not None :
517
524
data [header ] = _trim_excel_header (data [header ])
518
-
525
+
519
526
parser = TextParser (data , header = header , index_col = index_col ,
520
527
has_index_names = has_index_names ,
521
528
na_values = na_values ,
@@ -526,76 +533,103 @@ def _parse_cell(cell_contents,cell_typ):
526
533
skip_footer = skip_footer ,
527
534
chunksize = chunksize ,
528
535
** kwds )
529
-
536
+
530
537
output [asheetname ] = parser .read ()
531
-
538
+
532
539
if ret_dict :
533
540
return output
534
541
else :
535
542
return output [asheetname ]
536
-
537
543
538
544
def _parse_ods (self , sheetname = 0 , header = 0 , skiprows = None , skip_footer = 0 ,
539
545
index_col = None , has_index_names = None , parse_cols = None ,
540
546
parse_dates = False , date_parser = None , na_values = None ,
541
547
thousands = None , chunksize = None , convert_float = True ,
542
- ** kwds ):
543
-
544
- # sheetname can be index or string
545
- sheet = self .book .sheets [sheetname ]
546
-
547
- data = []
548
- should_parse = {}
549
- for i in range (sheet .nrows ()):
550
- row = []
551
- for j , cell in enumerate (sheet .row (i )):
552
-
553
- if parse_cols is not None and j not in should_parse :
554
- should_parse [j ] = self ._should_parse (j , parse_cols )
555
-
556
- if parse_cols is None or should_parse [j ]:
557
-
558
- if isinstance (cell .value , float ):
559
- value = cell .value
560
- if convert_float :
561
- # GH5394 - Excel and ODS 'numbers' are always floats
562
- # it's a minimal perf hit and less suprising
563
- # FIXME: this goes wrong when int(cell.value) returns
564
- # a long (>1e18)
565
- val = int (cell .value )
566
- if val == cell .value :
567
- value = val
568
- elif isinstance (cell .value , compat .string_types ):
569
- typ = cell .value_type
570
- # if typ == 'string':
571
- # value = cell.value
572
- if typ == 'date' or typ == 'time' :
573
- value = self ._parse_datetime (cell )
574
- else :
575
- value = cell .value
576
- elif isinstance (cell .value , bool ):
577
- value = cell .value
578
- # elif isinstance(cell.value, type(None)):
579
- # value = np.nan
580
- else :
581
- value = np .nan
548
+ verbose = False , ** kwds ):
582
549
583
- row .append (value )
550
+ def _parse_cell (cell ):
551
+ """converts the contents of the cell into a pandas
552
+ appropriate object"""
553
+ if isinstance (cell .value , float ):
554
+ value = cell .value
555
+ if convert_float :
556
+ # GH5394 - Excel and ODS 'numbers' are always floats
557
+ # it's a minimal perf hit and less suprising
558
+ # FIXME: this goes wrong when int(cell.value) returns
559
+ # a long (>1e18)
560
+ val = int (cell .value )
561
+ if val == cell .value :
562
+ value = val
563
+ elif isinstance (cell .value , compat .string_types ):
564
+ typ = cell .value_type
565
+ # if typ == 'string':
566
+ # value = cell.value
567
+ if typ == 'date' or typ == 'time' :
568
+ value = self ._parse_datetime (cell )
569
+ else :
570
+ value = cell .value
571
+ elif isinstance (cell .value , bool ):
572
+ value = cell .value
573
+ # elif isinstance(cell.value, type(None)):
574
+ # value = np.nan
575
+ else :
576
+ value = np .nan
577
+ return value
584
578
585
- data . append ( row )
579
+ ret_dict = False
586
580
587
- parser = TextParser (data , header = header , index_col = index_col ,
588
- has_index_names = has_index_names ,
589
- na_values = na_values ,
590
- thousands = thousands ,
591
- parse_dates = parse_dates ,
592
- date_parser = date_parser ,
593
- skiprows = skiprows ,
594
- skip_footer = skip_footer ,
595
- chunksize = chunksize ,
596
- ** kwds )
581
+ # Keep sheetname to maintain backwards compatibility.
582
+ if isinstance (sheetname , list ):
583
+ sheets = sheetname
584
+ ret_dict = True
585
+ elif sheetname is None :
586
+ sheets = self .sheet_names
587
+ ret_dict = True
588
+ else :
589
+ sheets = [sheetname ]
590
+
591
+ # handle same-type duplicates.
592
+ sheets = list (set (sheets ))
593
+
594
+ output = {}
595
+
596
+ for asheetname in sheets :
597
+ if verbose :
598
+ print ("Reading sheet %s" % asheetname )
599
+
600
+ # sheetname can be index or string
601
+ sheet = self .book .sheets [asheetname ]
602
+
603
+ data = []
604
+ should_parse = {}
605
+ for i in range (sheet .nrows ()):
606
+ row = []
607
+ for j , cell in enumerate (sheet .row (i )):
608
+
609
+ if parse_cols is not None and j not in should_parse :
610
+ should_parse [j ] = self ._should_parse (j , parse_cols )
611
+
612
+ if parse_cols is None or should_parse [j ]:
613
+ row .append (_parse_cell (cell ))
597
614
598
- return parser .read ()
615
+ data .append (row )
616
+
617
+ parser = TextParser (data , header = header , index_col = index_col ,
618
+ has_index_names = has_index_names ,
619
+ na_values = na_values ,
620
+ thousands = thousands ,
621
+ parse_dates = parse_dates ,
622
+ date_parser = date_parser ,
623
+ skiprows = skiprows ,
624
+ skip_footer = skip_footer ,
625
+ chunksize = chunksize ,
626
+ ** kwds )
627
+ output [asheetname ] = parser .read ()
628
+
629
+ if ret_dict :
630
+ return output
631
+ else :
632
+ return output [asheetname ]
599
633
600
634
def _parse_datetime (self , cell ):
601
635
"""Parse the date or time from on ods cell to a datetime object.
@@ -609,7 +643,7 @@ def _parse_datetime(self, cell):
609
643
def _value2date (value ):
610
644
try :
611
645
return datetime .datetime .strptime (value , '%Y-%m-%d' )
612
- except ValueError :# , TypeError):
646
+ except ValueError : # , TypeError):
613
647
return datetime .datetime .strptime (value , '%Y-%m-%dT%H:%M:%S' )
614
648
615
649
# Technically it is not necessary to try to derive the date/time
@@ -643,7 +677,7 @@ def _value2date(value):
643
677
value = _value2date (cell .value )
644
678
elif cell .value_type == 'time' :
645
679
try :
646
- # FIXME: what if the decimal separator is a comma in the locale?
680
+ # FIXME: what if the decimal separator is a comma in locale?
647
681
value = datetime .datetime .strptime (cell .value , 'PT%HH%MM%S.%fS' )
648
682
except ValueError :
649
683
value = datetime .datetime .strptime (cell .value , 'PT%HH%MM%SS' )
@@ -657,9 +691,9 @@ def _print_ods_cellinfo(self, cell):
657
691
Cell attributes are documented here:
658
692
https://pythonhosted.org/ezodf/tableobjects.html#id2
659
693
"""
660
- print (' plaintext:' , cell .plaintext ()) # no formatting
694
+ print (' plaintext:' , cell .plaintext ()) # no formatting
661
695
# formatted, but what is difference with value?
662
- print ('display_form:' , cell .display_form ) # format, ?=plaintext
696
+ print ('display_form:' , cell .display_form ) # format, ?=plaintext
663
697
print (' value:' , cell .value ) # data handled
664
698
print (' value_type:' , cell .value_type ) # data type
665
699
print (' formula:' , cell .formula )
0 commit comments