@@ -1803,64 +1803,32 @@ def _make_reader(self, f):
1803
1803
#----------------------------------------------------------------------
1804
1804
# ExcelFile class
1805
1805
1806
- _openpyxl_msg = ("\n For parsing .xlsx files 'openpyxl' is required.\n "
1807
- "You can install it via 'easy_install openpyxl' or "
1808
- "'pip install openpyxl'.\n Alternatively, you could save"
1809
- " the .xlsx file as a .xls file.\n " )
1810
-
1811
-
1812
1806
class ExcelFile (object ):
1813
1807
"""
1814
1808
Class for parsing tabular excel sheets into DataFrame objects.
1815
- Uses xlrd for parsing .xls files or openpyxl for .xlsx files.
1816
- See ExcelFile.parse for more documentation
1809
+ Uses xlrd. See ExcelFile.parse for more documentation
1817
1810
1818
1811
Parameters
1819
1812
----------
1820
1813
path : string or file-like object
1821
1814
Path to xls or xlsx file
1822
- kind : {'xls', 'xlsx', None}, default None
1823
1815
"""
1824
- def __init__ (self , path_or_buf , kind = None ):
1816
+ def __init__ (self , path_or_buf , kind = None , ** kwds ):
1825
1817
self .kind = kind
1826
- self .use_xlsx = kind == 'xls'
1818
+
1819
+ import xlrd # throw an ImportError if we need to
1820
+ ver = tuple (map (int ,xlrd .__VERSION__ .split ("." )[:2 ]))
1821
+ if ver < (0 , 9 ):
1822
+ raise ImportError ("pandas requires xlrd >= 0.9.0 for excel support" )
1827
1823
1828
1824
self .path_or_buf = path_or_buf
1829
1825
self .tmpfile = None
1830
1826
1831
1827
if isinstance (path_or_buf , basestring ):
1832
- if kind == 'xls' or (kind is None and
1833
- path_or_buf .endswith ('.xls' )):
1834
- self .use_xlsx = False
1835
- import xlrd
1836
- self .book = xlrd .open_workbook (path_or_buf )
1837
- else :
1838
- self .use_xlsx = True
1839
- try :
1840
- from openpyxl .reader .excel import load_workbook
1841
- self .book = load_workbook (path_or_buf , use_iterators = True )
1842
- except ImportError : # pragma: no cover
1843
- raise ImportError (_openpyxl_msg )
1828
+ self .book = xlrd .open_workbook (path_or_buf )
1844
1829
else :
1845
1830
data = path_or_buf .read ()
1846
-
1847
- if self .kind == 'xls' :
1848
- import xlrd
1849
- self .book = xlrd .open_workbook (file_contents = data )
1850
- elif self .kind == 'xlsx' :
1851
- from openpyxl .reader .excel import load_workbook
1852
- buf = py3compat .BytesIO (data )
1853
- self .book = load_workbook (buf , use_iterators = True )
1854
- else :
1855
- try :
1856
- import xlrd
1857
- self .book = xlrd .open_workbook (file_contents = data )
1858
- self .use_xlsx = False
1859
- except Exception :
1860
- self .use_xlsx = True
1861
- from openpyxl .reader .excel import load_workbook
1862
- buf = py3compat .BytesIO (data )
1863
- self .book = load_workbook (buf , use_iterators = True )
1831
+ self .book = xlrd .open_workbook (file_contents = data )
1864
1832
1865
1833
def __repr__ (self ):
1866
1834
return object .__repr__ (self )
@@ -1908,9 +1876,7 @@ def parse(self, sheetname, header=0, skiprows=None, skip_footer=0,
1908
1876
if skipfooter is not None :
1909
1877
skip_footer = skipfooter
1910
1878
1911
- choose = {True : self ._parse_xlsx ,
1912
- False : self ._parse_xls }
1913
- return choose [self .use_xlsx ](sheetname , header = header ,
1879
+ return self ._parse_excel (sheetname , header = header ,
1914
1880
skiprows = skiprows , index_col = index_col ,
1915
1881
has_index_names = has_index_names ,
1916
1882
parse_cols = parse_cols ,
@@ -1953,47 +1919,12 @@ def _excel2num(x):
1953
1919
else :
1954
1920
return i in parse_cols
1955
1921
1956
- def _parse_xlsx (self , sheetname , header = 0 , skiprows = None ,
1957
- skip_footer = 0 , index_col = None , has_index_names = False ,
1958
- parse_cols = None , parse_dates = False , date_parser = None ,
1959
- na_values = None , thousands = None , chunksize = None ):
1960
- sheet = self .book .get_sheet_by_name (name = sheetname )
1961
- data = []
1962
-
1963
- # it brings a new method: iter_rows()
1964
- should_parse = {}
1965
-
1966
- for row in sheet .iter_rows ():
1967
- row_data = []
1968
- for j , cell in enumerate (row ):
1969
-
1970
- if parse_cols is not None and j not in should_parse :
1971
- should_parse [j ] = self ._should_parse (j , parse_cols )
1972
-
1973
- if parse_cols is None or should_parse [j ]:
1974
- row_data .append (cell .internal_value )
1975
- data .append (row_data )
1976
-
1977
- if header is not None :
1978
- data [header ] = _trim_excel_header (data [header ])
1979
-
1980
- parser = TextParser (data , header = header , index_col = index_col ,
1981
- has_index_names = has_index_names ,
1982
- na_values = na_values ,
1983
- thousands = thousands ,
1984
- parse_dates = parse_dates ,
1985
- date_parser = date_parser ,
1986
- skiprows = skiprows ,
1987
- skip_footer = skip_footer ,
1988
- chunksize = chunksize )
1989
-
1990
- return parser .read ()
1991
-
1992
- def _parse_xls (self , sheetname , header = 0 , skiprows = None ,
1922
+ def _parse_excel (self , sheetname , header = 0 , skiprows = None ,
1993
1923
skip_footer = 0 , index_col = None , has_index_names = None ,
1994
1924
parse_cols = None , parse_dates = False , date_parser = None ,
1995
1925
na_values = None , thousands = None , chunksize = None ):
1996
- from xlrd import xldate_as_tuple , XL_CELL_DATE , XL_CELL_ERROR
1926
+ from xlrd import (xldate_as_tuple , XL_CELL_DATE ,
1927
+ XL_CELL_ERROR , XL_CELL_BOOLEAN )
1997
1928
1998
1929
datemode = self .book .datemode
1999
1930
sheet = self .book .sheet_by_name (sheetname )
@@ -2015,9 +1946,12 @@ def _parse_xls(self, sheetname, header=0, skiprows=None,
2015
1946
value = datetime .time (* dt [3 :])
2016
1947
else :
2017
1948
value = datetime .datetime (* dt )
2018
- if typ == XL_CELL_ERROR :
1949
+ elif typ == XL_CELL_ERROR :
2019
1950
value = np .nan
1951
+ elif typ == XL_CELL_BOOLEAN :
1952
+ value = bool (value )
2020
1953
row .append (value )
1954
+
2021
1955
data .append (row )
2022
1956
2023
1957
if header is not None :
@@ -2037,9 +1971,6 @@ def _parse_xls(self, sheetname, header=0, skiprows=None,
2037
1971
2038
1972
@property
2039
1973
def sheet_names (self ):
2040
- if self .use_xlsx :
2041
- return self .book .get_sheet_names ()
2042
- else :
2043
1974
return self .book .sheet_names ()
2044
1975
2045
1976
0 commit comments