Skip to content

ENH Factored out excel_value_to_python_value from ExcelFile::_parse_exce... #4590

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 26, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion doc/source/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@ pandas 0.13
- Text parser now treats anything that reads like inf ("inf", "Inf", "-Inf",
"iNf", etc.) to infinity. (:issue:`4220`, :issue:`4219`), affecting
``read_table``, ``read_csv``, etc.

- Factored out excel_value_to_python_value from ExcelFile::_parse_excel (:issue:`4589`)

**API Changes**

- ``pandas`` now is Python 2/3 compatible without the need for 2to3 thanks to
Expand Down
31 changes: 18 additions & 13 deletions pandas/io/excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,23 @@ def read_excel(path_or_buf, sheetname, kind=None, **kwds):
return ExcelFile(path_or_buf, kind=kind).parse(sheetname=sheetname,
kind=kind, **kwds)

def excel_value_to_python_value(value, typ, datemode):
from xlrd import (xldate_as_tuple, XL_CELL_DATE,
XL_CELL_ERROR, XL_CELL_BOOLEAN)

if typ == XL_CELL_DATE:
dt = xldate_as_tuple(value, datemode)
# how to produce this first case?
if dt[0] < datetime.MINYEAR: # pragma: no cover
value = datetime.time(*dt[3:])
else:
value = datetime.datetime(*dt)
elif typ == XL_CELL_ERROR:
value = np.nan
elif typ == XL_CELL_BOOLEAN:
value = bool(value)

return value

class ExcelFile(object):
"""
Expand Down Expand Up @@ -174,8 +191,6 @@ def _parse_excel(self, sheetname, header=0, skiprows=None, skip_footer=0,
index_col=None, has_index_names=None, parse_cols=None,
parse_dates=False, date_parser=None, na_values=None,
thousands=None, chunksize=None, **kwds):
from xlrd import (xldate_as_tuple, XL_CELL_DATE,
XL_CELL_ERROR, XL_CELL_BOOLEAN)

datemode = self.book.datemode
if isinstance(sheetname, compat.string_types):
Expand All @@ -193,17 +208,7 @@ def _parse_excel(self, sheetname, header=0, skiprows=None, skip_footer=0,
should_parse[j] = self._should_parse(j, parse_cols)

if parse_cols is None or should_parse[j]:
if typ == XL_CELL_DATE:
dt = xldate_as_tuple(value, datemode)
# how to produce this first case?
if dt[0] < datetime.MINYEAR: # pragma: no cover
value = datetime.time(*dt[3:])
else:
value = datetime.datetime(*dt)
elif typ == XL_CELL_ERROR:
value = np.nan
elif typ == XL_CELL_BOOLEAN:
value = bool(value)
value = excel_value_to_python_value(value=value, typ=typ, datemode=datemode)
row.append(value)

data.append(row)
Expand Down
Binary file added pandas/io/tests/data/types.xls
Binary file not shown.
29 changes: 28 additions & 1 deletion pandas/io/tests/test_excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@
import pandas.io.parsers as parsers
from pandas.io.parsers import (read_csv, read_table, read_fwf,
TextParser, TextFileReader)
from pandas.io.excel import ExcelFile, ExcelWriter, read_excel
from pandas.io.excel import ExcelFile, ExcelWriter, read_excel,\
excel_value_to_python_value
from pandas.util.testing import (assert_almost_equal,
assert_series_equal,
network,
Expand Down Expand Up @@ -260,6 +261,32 @@ def test_excel_table(self):
tm.assert_frame_equal(df4, df.ix[:-1])
tm.assert_frame_equal(df4, df5)

def test_excel_value_to_python_value(self):
_skip_if_no_xlrd()

pth = os.path.join(self.dirpath, 'types.xls')
xls = ExcelFile(pth)
book = xls.book
sheet = book.sheet_by_index(0)

cell = sheet.cell(0, 0)
self.assertEquals(excel_value_to_python_value(value=cell.value, typ=cell.ctype, datemode=book.datemode), 'date')

cell = sheet.cell(0, 1)
self.assertEquals(excel_value_to_python_value(value=cell.value, typ=cell.ctype, datemode=book.datemode), datetime(year=2013, month=4, day=1))

cell = sheet.cell(1, 1)
self.assertEquals(excel_value_to_python_value(value=cell.value, typ=cell.ctype, datemode=book.datemode), True)

cell = sheet.cell(2, 1)
self.assertEquals(excel_value_to_python_value(value=cell.value, typ=cell.ctype, datemode=book.datemode), 1)

cell = sheet.cell(3, 1)
self.assertEquals(excel_value_to_python_value(value=cell.value, typ=cell.ctype, datemode=book.datemode), 1.1)

cell = sheet.cell(4, 1)
self.assertIs(excel_value_to_python_value(value=cell.value, typ=cell.ctype, datemode=book.datemode), np.nan) #We need to use is here because value is NaN

def test_excel_read_buffer(self):
_skip_if_no_xlrd()
_skip_if_no_openpyxl()
Expand Down