Skip to content

Commit 9fcd30b

Browse files
committed
ENH Factored out excel_value_to_python_value from ExcelFile::_parse_excel (GH4589)
1 parent aeabda1 commit 9fcd30b

File tree

4 files changed

+48
-15
lines changed

4 files changed

+48
-15
lines changed

doc/source/release.rst

+2-1
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,8 @@ pandas 0.13
4444
- Text parser now treats anything that reads like inf ("inf", "Inf", "-Inf",
4545
"iNf", etc.) to infinity. (:issue:`4220`, :issue:`4219`), affecting
4646
``read_table``, ``read_csv``, etc.
47-
47+
- Factored out excel_value_to_python_value from ExcelFile::_parse_excel (:issue:`4589`)
48+
4849
**API Changes**
4950

5051
- ``pandas`` now is Python 2/3 compatible without the need for 2to3 thanks to

pandas/io/excel.py

+18-13
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,23 @@ def read_excel(path_or_buf, sheetname, kind=None, **kwds):
5353
return ExcelFile(path_or_buf, kind=kind).parse(sheetname=sheetname,
5454
kind=kind, **kwds)
5555

56+
def excel_value_to_python_value(value, typ, datemode):
57+
from xlrd import (xldate_as_tuple, XL_CELL_DATE,
58+
XL_CELL_ERROR, XL_CELL_BOOLEAN)
59+
60+
if typ == XL_CELL_DATE:
61+
dt = xldate_as_tuple(value, datemode)
62+
# how to produce this first case?
63+
if dt[0] < datetime.MINYEAR: # pragma: no cover
64+
value = datetime.time(*dt[3:])
65+
else:
66+
value = datetime.datetime(*dt)
67+
elif typ == XL_CELL_ERROR:
68+
value = np.nan
69+
elif typ == XL_CELL_BOOLEAN:
70+
value = bool(value)
71+
72+
return value
5673

5774
class ExcelFile(object):
5875
"""
@@ -174,8 +191,6 @@ def _parse_excel(self, sheetname, header=0, skiprows=None, skip_footer=0,
174191
index_col=None, has_index_names=None, parse_cols=None,
175192
parse_dates=False, date_parser=None, na_values=None,
176193
thousands=None, chunksize=None, **kwds):
177-
from xlrd import (xldate_as_tuple, XL_CELL_DATE,
178-
XL_CELL_ERROR, XL_CELL_BOOLEAN)
179194

180195
datemode = self.book.datemode
181196
if isinstance(sheetname, compat.string_types):
@@ -193,17 +208,7 @@ def _parse_excel(self, sheetname, header=0, skiprows=None, skip_footer=0,
193208
should_parse[j] = self._should_parse(j, parse_cols)
194209

195210
if parse_cols is None or should_parse[j]:
196-
if typ == XL_CELL_DATE:
197-
dt = xldate_as_tuple(value, datemode)
198-
# how to produce this first case?
199-
if dt[0] < datetime.MINYEAR: # pragma: no cover
200-
value = datetime.time(*dt[3:])
201-
else:
202-
value = datetime.datetime(*dt)
203-
elif typ == XL_CELL_ERROR:
204-
value = np.nan
205-
elif typ == XL_CELL_BOOLEAN:
206-
value = bool(value)
211+
value = excel_value_to_python_value(value=value, typ=typ, datemode=datemode)
207212
row.append(value)
208213

209214
data.append(row)

pandas/io/tests/data/types.xls

16 KB
Binary file not shown.

pandas/io/tests/test_excel.py

+28-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@
1818
import pandas.io.parsers as parsers
1919
from pandas.io.parsers import (read_csv, read_table, read_fwf,
2020
TextParser, TextFileReader)
21-
from pandas.io.excel import ExcelFile, ExcelWriter, read_excel
21+
from pandas.io.excel import ExcelFile, ExcelWriter, read_excel,\
22+
excel_value_to_python_value
2223
from pandas.util.testing import (assert_almost_equal,
2324
assert_series_equal,
2425
network,
@@ -260,6 +261,32 @@ def test_excel_table(self):
260261
tm.assert_frame_equal(df4, df.ix[:-1])
261262
tm.assert_frame_equal(df4, df5)
262263

264+
def test_excel_value_to_python_value(self):
265+
_skip_if_no_xlrd()
266+
267+
pth = os.path.join(self.dirpath, 'types.xls')
268+
xls = ExcelFile(pth)
269+
book = xls.book
270+
sheet = book.sheet_by_index(0)
271+
272+
cell = sheet.cell(0, 0)
273+
self.assertEquals(excel_value_to_python_value(value=cell.value, typ=cell.ctype, datemode=book.datemode), 'date')
274+
275+
cell = sheet.cell(0, 1)
276+
self.assertEquals(excel_value_to_python_value(value=cell.value, typ=cell.ctype, datemode=book.datemode), datetime(year=2013, month=4, day=1))
277+
278+
cell = sheet.cell(1, 1)
279+
self.assertEquals(excel_value_to_python_value(value=cell.value, typ=cell.ctype, datemode=book.datemode), True)
280+
281+
cell = sheet.cell(2, 1)
282+
self.assertEquals(excel_value_to_python_value(value=cell.value, typ=cell.ctype, datemode=book.datemode), 1)
283+
284+
cell = sheet.cell(3, 1)
285+
self.assertEquals(excel_value_to_python_value(value=cell.value, typ=cell.ctype, datemode=book.datemode), 1.1)
286+
287+
cell = sheet.cell(4, 1)
288+
self.assertIs(excel_value_to_python_value(value=cell.value, typ=cell.ctype, datemode=book.datemode), np.nan) #We need to use is here because value is NaN
289+
263290
def test_excel_read_buffer(self):
264291
_skip_if_no_xlrd()
265292
_skip_if_no_openpyxl()

0 commit comments

Comments
 (0)