@@ -358,23 +358,16 @@ def read_excel(io,
358
358
** kwds )
359
359
360
360
361
- class ExcelFile (object ):
362
- """
363
- Class for parsing tabular excel sheets into DataFrame objects.
364
- Uses xlrd. See read_excel for more documentation
365
-
366
- Parameters
367
- ----------
368
- io : string, path object (pathlib.Path or py._path.local.LocalPath),
369
- file-like object or xlrd workbook
370
- If a string or path object, expected to be a path to xls or xlsx file
371
- engine : string, default None
372
- If io is not a buffer or path, this must be set to identify io.
373
- Acceptable values are None or xlrd
374
- """
361
+ class _XlrdReader (object ):
375
362
376
- def __init__ (self , io , ** kwds ):
363
+ def __init__ (self , filepath_or_buffer ):
364
+ """Reader using xlrd engine.
377
365
366
+ Parameters
367
+ ----------
368
+ filepath_or_buffer : string, path object or Workbook
369
+ Object to be parsed.
370
+ """
378
371
err_msg = "Install xlrd >= 1.0.0 for Excel support"
379
372
380
373
try :
@@ -386,46 +379,39 @@ def __init__(self, io, **kwds):
386
379
raise ImportError (err_msg +
387
380
". Current version " + xlrd .__VERSION__ )
388
381
389
- # could be a str, ExcelFile, Book, etc.
390
- self .io = io
391
- # Always a string
392
- self ._io = _stringify_path (io )
393
-
394
- engine = kwds .pop ('engine' , None )
395
-
396
- if engine is not None and engine != 'xlrd' :
397
- raise ValueError ("Unknown engine: {engine}" .format (engine = engine ))
398
-
399
- # If io is a url, want to keep the data as bytes so can't pass
400
- # to get_filepath_or_buffer()
401
- if _is_url (self ._io ):
402
- io = _urlopen (self ._io )
403
- elif not isinstance (self .io , (ExcelFile , xlrd .Book )):
404
- io , _ , _ , _ = get_filepath_or_buffer (self ._io )
405
-
406
- if engine == 'xlrd' and isinstance (io , xlrd .Book ):
407
- self .book = io
408
- elif not isinstance (io , xlrd .Book ) and hasattr (io , "read" ):
382
+ # If filepath_or_buffer is a url, want to keep the data as bytes so
383
+ # can't pass to get_filepath_or_buffer()
384
+ if _is_url (filepath_or_buffer ):
385
+ filepath_or_buffer = _urlopen (filepath_or_buffer )
386
+ elif not isinstance (filepath_or_buffer , (ExcelFile , xlrd .Book )):
387
+ filepath_or_buffer , _ , _ , _ = get_filepath_or_buffer (
388
+ filepath_or_buffer )
389
+
390
+ if isinstance (filepath_or_buffer , xlrd .Book ):
391
+ self .book = filepath_or_buffer
392
+ elif not isinstance (filepath_or_buffer , xlrd .Book ) and hasattr (
393
+ filepath_or_buffer , "read" ):
409
394
# N.B. xlrd.Book has a read attribute too
410
- if hasattr (io , 'seek' ):
395
+ if hasattr (filepath_or_buffer , 'seek' ):
411
396
try :
412
397
# GH 19779
413
- io .seek (0 )
398
+ filepath_or_buffer .seek (0 )
414
399
except UnsupportedOperation :
415
400
# HTTPResponse does not support seek()
416
401
# GH 20434
417
402
pass
418
403
419
- data = io .read ()
404
+ data = filepath_or_buffer .read ()
420
405
self .book = xlrd .open_workbook (file_contents = data )
421
- elif isinstance (self . _io , compat .string_types ):
422
- self .book = xlrd .open_workbook (self . _io )
406
+ elif isinstance (filepath_or_buffer , compat .string_types ):
407
+ self .book = xlrd .open_workbook (filepath_or_buffer )
423
408
else :
424
409
raise ValueError ('Must explicitly set engine if not passing in'
425
410
' buffer or path for io.' )
426
411
427
- def __fspath__ (self ):
428
- return self ._io
412
+ @property
413
+ def sheet_names (self ):
414
+ return self .book .sheet_names ()
429
415
430
416
def parse (self ,
431
417
sheet_name = 0 ,
@@ -434,12 +420,13 @@ def parse(self,
434
420
index_col = None ,
435
421
usecols = None ,
436
422
squeeze = False ,
437
- converters = None ,
423
+ dtype = None ,
438
424
true_values = None ,
439
425
false_values = None ,
440
426
skiprows = None ,
441
427
nrows = None ,
442
428
na_values = None ,
429
+ verbose = False ,
443
430
parse_dates = False ,
444
431
date_parser = None ,
445
432
thousands = None ,
@@ -448,72 +435,9 @@ def parse(self,
448
435
convert_float = True ,
449
436
mangle_dupe_cols = True ,
450
437
** kwds ):
451
- """
452
- Parse specified sheet(s) into a DataFrame
453
-
454
- Equivalent to read_excel(ExcelFile, ...) See the read_excel
455
- docstring for more info on accepted parameters
456
- """
457
-
458
- # Can't use _deprecate_kwarg since sheetname=None has a special meaning
459
- if is_integer (sheet_name ) and sheet_name == 0 and 'sheetname' in kwds :
460
- warnings .warn ("The `sheetname` keyword is deprecated, use "
461
- "`sheet_name` instead" , FutureWarning , stacklevel = 2 )
462
- sheet_name = kwds .pop ("sheetname" )
463
- elif 'sheetname' in kwds :
464
- raise TypeError ("Cannot specify both `sheet_name` "
465
- "and `sheetname`. Use just `sheet_name`" )
466
-
467
- return self ._parse_excel (sheet_name = sheet_name ,
468
- header = header ,
469
- names = names ,
470
- index_col = index_col ,
471
- usecols = usecols ,
472
- squeeze = squeeze ,
473
- converters = converters ,
474
- true_values = true_values ,
475
- false_values = false_values ,
476
- skiprows = skiprows ,
477
- nrows = nrows ,
478
- na_values = na_values ,
479
- parse_dates = parse_dates ,
480
- date_parser = date_parser ,
481
- thousands = thousands ,
482
- comment = comment ,
483
- skipfooter = skipfooter ,
484
- convert_float = convert_float ,
485
- mangle_dupe_cols = mangle_dupe_cols ,
486
- ** kwds )
487
-
488
- def _parse_excel (self ,
489
- sheet_name = 0 ,
490
- header = 0 ,
491
- names = None ,
492
- index_col = None ,
493
- usecols = None ,
494
- squeeze = False ,
495
- dtype = None ,
496
- true_values = None ,
497
- false_values = None ,
498
- skiprows = None ,
499
- nrows = None ,
500
- na_values = None ,
501
- verbose = False ,
502
- parse_dates = False ,
503
- date_parser = None ,
504
- thousands = None ,
505
- comment = None ,
506
- skipfooter = 0 ,
507
- convert_float = True ,
508
- mangle_dupe_cols = True ,
509
- ** kwds ):
510
438
511
439
_validate_header_arg (header )
512
440
513
- if 'chunksize' in kwds :
514
- raise NotImplementedError ("chunksize keyword of read_excel "
515
- "is not implemented" )
516
-
517
441
from xlrd import (xldate , XL_CELL_DATE ,
518
442
XL_CELL_ERROR , XL_CELL_BOOLEAN ,
519
443
XL_CELL_NUMBER )
@@ -563,7 +487,7 @@ def _parse_cell(cell_contents, cell_typ):
563
487
sheets = sheet_name
564
488
ret_dict = True
565
489
elif sheet_name is None :
566
- sheets = self .sheet_names
490
+ sheets = self .book . sheet_names ()
567
491
ret_dict = True
568
492
else :
569
493
sheets = [sheet_name ]
@@ -678,9 +602,111 @@ def _parse_cell(cell_contents, cell_typ):
678
602
else :
679
603
return output [asheetname ]
680
604
605
+
606
+ class ExcelFile (object ):
607
+ """
608
+ Class for parsing tabular excel sheets into DataFrame objects.
609
+ Uses xlrd. See read_excel for more documentation
610
+
611
+ Parameters
612
+ ----------
613
+ io : string, path object (pathlib.Path or py._path.local.LocalPath),
614
+ file-like object or xlrd workbook
615
+ If a string or path object, expected to be a path to xls or xlsx file.
616
+ engine : string, default None
617
+ If io is not a buffer or path, this must be set to identify io.
618
+ Acceptable values are None or ``xlrd``.
619
+ """
620
+
621
+ _engines = {
622
+ 'xlrd' : _XlrdReader ,
623
+ }
624
+
625
+ def __init__ (self , io , engine = None ):
626
+ if engine is None :
627
+ engine = 'xlrd'
628
+ if engine not in self ._engines :
629
+ raise ValueError ("Unknown engine: {engine}" .format (engine = engine ))
630
+
631
+ # could be a str, ExcelFile, Book, etc.
632
+ self .io = io
633
+ # Always a string
634
+ self ._io = _stringify_path (io )
635
+
636
+ self ._reader = self ._engines [engine ](self ._io )
637
+
638
+ def __fspath__ (self ):
639
+ return self ._io
640
+
641
+ def parse (self ,
642
+ sheet_name = 0 ,
643
+ header = 0 ,
644
+ names = None ,
645
+ index_col = None ,
646
+ usecols = None ,
647
+ squeeze = False ,
648
+ converters = None ,
649
+ true_values = None ,
650
+ false_values = None ,
651
+ skiprows = None ,
652
+ nrows = None ,
653
+ na_values = None ,
654
+ parse_dates = False ,
655
+ date_parser = None ,
656
+ thousands = None ,
657
+ comment = None ,
658
+ skipfooter = 0 ,
659
+ convert_float = True ,
660
+ mangle_dupe_cols = True ,
661
+ ** kwds ):
662
+ """
663
+ Parse specified sheet(s) into a DataFrame
664
+
665
+ Equivalent to read_excel(ExcelFile, ...) See the read_excel
666
+ docstring for more info on accepted parameters
667
+ """
668
+
669
+ # Can't use _deprecate_kwarg since sheetname=None has a special meaning
670
+ if is_integer (sheet_name ) and sheet_name == 0 and 'sheetname' in kwds :
671
+ warnings .warn ("The `sheetname` keyword is deprecated, use "
672
+ "`sheet_name` instead" , FutureWarning , stacklevel = 2 )
673
+ sheet_name = kwds .pop ("sheetname" )
674
+ elif 'sheetname' in kwds :
675
+ raise TypeError ("Cannot specify both `sheet_name` "
676
+ "and `sheetname`. Use just `sheet_name`" )
677
+
678
+ if 'chunksize' in kwds :
679
+ raise NotImplementedError ("chunksize keyword of read_excel "
680
+ "is not implemented" )
681
+
682
+ return self ._reader .parse (sheet_name = sheet_name ,
683
+ header = header ,
684
+ names = names ,
685
+ index_col = index_col ,
686
+ usecols = usecols ,
687
+ squeeze = squeeze ,
688
+ converters = converters ,
689
+ true_values = true_values ,
690
+ false_values = false_values ,
691
+ skiprows = skiprows ,
692
+ nrows = nrows ,
693
+ na_values = na_values ,
694
+ parse_dates = parse_dates ,
695
+ date_parser = date_parser ,
696
+ thousands = thousands ,
697
+ comment = comment ,
698
+ skipfooter = skipfooter ,
699
+ convert_float = convert_float ,
700
+ mangle_dupe_cols = mangle_dupe_cols ,
701
+ ** kwds )
702
+
703
+ @property
704
+ def book (self ):
705
+ return self ._reader .book
706
+
681
707
@property
682
708
def sheet_names (self ):
683
- return self .book .sheet_names ()
709
+ return self ._reader .sheet_names
684
710
685
711
def close (self ):
686
712
"""close io if necessary"""
0 commit comments