Skip to content

Commit 6e38eca

Browse files
committed
ExcelFile class, enhanced excel date handling, per GH#9
1 parent 4fbb548 commit 6e38eca

File tree

1 file changed

+54
-16
lines changed

1 file changed

+54
-16
lines changed

Diff for: pandas/io/parsers.py

+54-16
Original file line numberDiff line numberDiff line change
@@ -136,21 +136,59 @@ def parse_date(s):
136136
#===============================================================================
137137

138138

139+
class ExcelFile(object):
140+
"""
141+
Class for parsing tabular .xls sheets into DataFrame objects, uses xlrd
139142
140-
def parseExcel(filepath, header = None, indexCol = 0, dateCol = 0,
141-
sheetname = None):
142-
from pandas.core.datetools import ole2datetime
143-
try:
143+
Parameters
144+
----------
145+
path : string
146+
Path to xls file
147+
"""
148+
149+
def __init__(self, path):
144150
import xlrd
145-
except:
146-
raise Exception('Sorry, you do not have xlrd.')
147-
book = xlrd.open_workbook(filepath)
148-
sheet = book.sheet_by_name(sheetname)
149-
data = [sheet.row_values(i) for i in range(sheet.nrows)]
150-
if dateCol is not None:
151-
for row in data:
152-
try:
153-
row[dateCol] = ole2datetime(row[dateCol])
154-
except Exception:
155-
pass
156-
return simpleParser(data, header = header, indexCol = indexCol)
151+
self.path = path
152+
self.book = xlrd.open_workbook(path)
153+
154+
def old_parse(self, sheetname, header=None, index_col=0, date_col=0):
155+
from pandas.core.datetools import ole2datetime
156+
sheet = self.book.sheet_by_name(sheetname)
157+
158+
data = [sheet.row_values(i) for i in range(sheet.nrows)]
159+
if date_col is not None:
160+
for row in data:
161+
try:
162+
row[date_col] = ole2datetime(row[date_col])
163+
except Exception:
164+
pass
165+
return simpleParser(data, header=header, indexCol=index_col)
166+
167+
def parse(self, sheetname, header=None, index_col=0):
168+
from datetime import MINYEAR, time, datetime
169+
from xlrd import xldate_as_tuple, XL_CELL_DATE
170+
171+
datemode = self.book.datemode
172+
sheet = self.book.sheet_by_name(sheetname)
173+
174+
data = []
175+
for i in range(sheet.nrows):
176+
row = []
177+
for value, typ in zip(sheet.row_values(i), sheet.row_types(i)):
178+
if typ == XL_CELL_DATE:
179+
dt = xldate_as_tuple(value, datemode)
180+
if dt[0] < MINYEAR:
181+
value = time(*dt[3:])
182+
else:
183+
value = datetime(*dt)
184+
row.append(value)
185+
data.append(row)
186+
return simpleParser(data, header=header, indexCol=index_col)
187+
188+
def parseExcel(filepath, header=None, indexCol=0, sheetname=None, **kwds):
189+
"""
190+
191+
"""
192+
excel_file = ExcelFile(filepath)
193+
return excel_file.parse(sheetname, header=header, index_col=indexCol)
194+

0 commit comments

Comments
 (0)