diff --git a/doc/source/release.rst b/doc/source/release.rst index a2b525a737879..275717ef984ea 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -44,7 +44,8 @@ pandas 0.13 - Text parser now treats anything that reads like inf ("inf", "Inf", "-Inf", "iNf", etc.) to infinity. (:issue:`4220`, :issue:`4219`), affecting ``read_table``, ``read_csv``, etc. - + - Factored out excel_value_to_python_value from ExcelFile::_parse_excel (:issue:`4589`) + **API Changes** - ``pandas`` now is Python 2/3 compatible without the need for 2to3 thanks to diff --git a/pandas/io/excel.py b/pandas/io/excel.py index 534a88e303dbf..aa89264cfc695 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -53,6 +53,23 @@ def read_excel(path_or_buf, sheetname, kind=None, **kwds): return ExcelFile(path_or_buf, kind=kind).parse(sheetname=sheetname, kind=kind, **kwds) +def excel_value_to_python_value(value, typ, datemode): + from xlrd import (xldate_as_tuple, XL_CELL_DATE, + XL_CELL_ERROR, XL_CELL_BOOLEAN) + + if typ == XL_CELL_DATE: + dt = xldate_as_tuple(value, datemode) + # how to produce this first case? + if dt[0] < datetime.MINYEAR: # pragma: no cover + value = datetime.time(*dt[3:]) + else: + value = datetime.datetime(*dt) + elif typ == XL_CELL_ERROR: + value = np.nan + elif typ == XL_CELL_BOOLEAN: + value = bool(value) + + return value class ExcelFile(object): """ @@ -174,8 +191,6 @@ def _parse_excel(self, sheetname, header=0, skiprows=None, skip_footer=0, index_col=None, has_index_names=None, parse_cols=None, parse_dates=False, date_parser=None, na_values=None, thousands=None, chunksize=None, **kwds): - from xlrd import (xldate_as_tuple, XL_CELL_DATE, - XL_CELL_ERROR, XL_CELL_BOOLEAN) datemode = self.book.datemode if isinstance(sheetname, compat.string_types): @@ -193,17 +208,7 @@ def _parse_excel(self, sheetname, header=0, skiprows=None, skip_footer=0, should_parse[j] = self._should_parse(j, parse_cols) if parse_cols is None or should_parse[j]: - if typ == XL_CELL_DATE: - dt = xldate_as_tuple(value, datemode) - # how to produce this first case? - if dt[0] < datetime.MINYEAR: # pragma: no cover - value = datetime.time(*dt[3:]) - else: - value = datetime.datetime(*dt) - elif typ == XL_CELL_ERROR: - value = np.nan - elif typ == XL_CELL_BOOLEAN: - value = bool(value) + value = excel_value_to_python_value(value=value, typ=typ, datemode=datemode) row.append(value) data.append(row) diff --git a/pandas/io/tests/data/types.xls b/pandas/io/tests/data/types.xls new file mode 100755 index 0000000000000..232a88609df92 Binary files /dev/null and b/pandas/io/tests/data/types.xls differ diff --git a/pandas/io/tests/test_excel.py b/pandas/io/tests/test_excel.py index 1ac4d4e31ed10..f5db9d0c016af 100644 --- a/pandas/io/tests/test_excel.py +++ b/pandas/io/tests/test_excel.py @@ -18,7 +18,8 @@ import pandas.io.parsers as parsers from pandas.io.parsers import (read_csv, read_table, read_fwf, TextParser, TextFileReader) -from pandas.io.excel import ExcelFile, ExcelWriter, read_excel +from pandas.io.excel import ExcelFile, ExcelWriter, read_excel,\ + excel_value_to_python_value from pandas.util.testing import (assert_almost_equal, assert_series_equal, network, @@ -260,6 +261,32 @@ def test_excel_table(self): tm.assert_frame_equal(df4, df.ix[:-1]) tm.assert_frame_equal(df4, df5) + def test_excel_value_to_python_value(self): + _skip_if_no_xlrd() + + pth = os.path.join(self.dirpath, 'types.xls') + xls = ExcelFile(pth) + book = xls.book + sheet = book.sheet_by_index(0) + + cell = sheet.cell(0, 0) + self.assertEquals(excel_value_to_python_value(value=cell.value, typ=cell.ctype, datemode=book.datemode), 'date') + + cell = sheet.cell(0, 1) + self.assertEquals(excel_value_to_python_value(value=cell.value, typ=cell.ctype, datemode=book.datemode), datetime(year=2013, month=4, day=1)) + + cell = sheet.cell(1, 1) + self.assertEquals(excel_value_to_python_value(value=cell.value, typ=cell.ctype, datemode=book.datemode), True) + + cell = sheet.cell(2, 1) + self.assertEquals(excel_value_to_python_value(value=cell.value, typ=cell.ctype, datemode=book.datemode), 1) + + cell = sheet.cell(3, 1) + self.assertEquals(excel_value_to_python_value(value=cell.value, typ=cell.ctype, datemode=book.datemode), 1.1) + + cell = sheet.cell(4, 1) + self.assertIs(excel_value_to_python_value(value=cell.value, typ=cell.ctype, datemode=book.datemode), np.nan) #We need to use is here because value is NaN + def test_excel_read_buffer(self): _skip_if_no_xlrd() _skip_if_no_openpyxl()