Skip to content

Commit a5cab86

Browse files
committed
Merge pull request #6934 from jmcnamara/bug_decimal_seconds
BUG: Fix to read decimal seconds from Excel.
2 parents 47c740d + 01b3399 commit a5cab86

File tree

5 files changed

+86
-14
lines changed

5 files changed

+86
-14
lines changed

doc/source/release.rst

+1
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,7 @@ Improvements to existing features
285285
- Translate ``sep='\s+'`` to ``delim_whitespace=True`` in
286286
:func:`read_csv`/:func:`read_table` if no other C-unsupported options
287287
specified (:issue:`6607`)
288+
- ``read_excel`` can now read milliseconds in Excel dates and times with xlrd >= 0.9.3. (:issue:`5945`)
288289

289290
.. _release.bug_fixes-0.14.0:
290291

pandas/io/excel.py

+36-9
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import pandas.compat as compat
1919
import pandas.core.common as com
2020
from warnings import warn
21+
from distutils.version import LooseVersion
2122

2223
__all__ = ["read_excel", "ExcelWriter", "ExcelFile"]
2324

@@ -250,11 +251,19 @@ def _parse_excel(self, sheetname=0, header=0, skiprows=None, skip_footer=0,
250251
parse_dates=False, date_parser=None, na_values=None,
251252
thousands=None, chunksize=None, convert_float=True,
252253
**kwds):
253-
from xlrd import (xldate_as_tuple, XL_CELL_DATE,
254+
import xlrd
255+
from xlrd import (xldate, XL_CELL_DATE,
254256
XL_CELL_ERROR, XL_CELL_BOOLEAN,
255257
XL_CELL_NUMBER)
256258

257-
datemode = self.book.datemode
259+
epoch1904 = self.book.datemode
260+
261+
# xlrd >= 0.9.3 can return datetime objects directly.
262+
if LooseVersion(xlrd.__VERSION__) >= LooseVersion("0.9.3"):
263+
xlrd_0_9_3 = True
264+
else:
265+
xlrd_0_9_3 = False
266+
258267
if isinstance(sheetname, compat.string_types):
259268
sheet = self.book.sheet_by_name(sheetname)
260269
else: # assume an integer if not a string
@@ -271,12 +280,29 @@ def _parse_excel(self, sheetname=0, header=0, skiprows=None, skip_footer=0,
271280

272281
if parse_cols is None or should_parse[j]:
273282
if typ == XL_CELL_DATE:
274-
dt = xldate_as_tuple(value, datemode)
275-
# how to produce this first case?
276-
if dt[0] < datetime.MINYEAR: # pragma: no cover
277-
value = datetime.time(*dt[3:])
283+
if xlrd_0_9_3:
284+
# Use the newer xlrd datetime handling.
285+
value = xldate.xldate_as_datetime(value, epoch1904)
286+
287+
# Excel doesn't distinguish between dates and time,
288+
# so we treat dates on the epoch as times only.
289+
# Also, Excel supports 1900 and 1904 epochs.
290+
year = (value.timetuple())[0:3]
291+
if ((not epoch1904 and year == (1899, 12, 31))
292+
or (epoch1904 and year == (1904, 1, 1))):
293+
value = datetime.time(value.hour,
294+
value.minute,
295+
value.second,
296+
value.microsecond)
278297
else:
279-
value = datetime.datetime(*dt)
298+
# Use the xlrd <= 0.9.2 date handling.
299+
dt = xldate.xldate_as_tuple(value, epoch1904)
300+
301+
if dt[0] < datetime.MINYEAR:
302+
value = datetime.time(*dt[3:])
303+
else:
304+
value = datetime.datetime(*dt)
305+
280306
elif typ == XL_CELL_ERROR:
281307
value = np.nan
282308
elif typ == XL_CELL_BOOLEAN:
@@ -727,8 +753,9 @@ def __init__(self, path, engine=None,
727753
import xlsxwriter
728754

729755
super(_XlsxWriter, self).__init__(path, engine=engine,
730-
date_format=date_format, datetime_format=datetime_format,
731-
**engine_kwargs)
756+
date_format=date_format,
757+
datetime_format=datetime_format,
758+
**engine_kwargs)
732759

733760
self.book = xlsxwriter.Workbook(path, **engine_kwargs)
734761

pandas/io/tests/data/times_1900.xls

16 KB
Binary file not shown.

pandas/io/tests/data/times_1904.xls

16 KB
Binary file not shown.

pandas/io/tests/test_excel.py

+49-5
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
# pylint: disable=E1101
22

33
from pandas.compat import u, range, map
4-
from datetime import datetime, date
4+
from datetime import datetime, date, time
55
import os
6+
from distutils.version import LooseVersion
67

78
import nose
89

@@ -360,6 +361,49 @@ def test_reader_special_dtypes(self):
360361
convert_float=False)
361362
tm.assert_frame_equal(actual, no_convert_float)
362363

364+
def test_reader_seconds(self):
365+
# Test reading times with and without milliseconds. GH5945.
366+
_skip_if_no_xlrd()
367+
import xlrd
368+
369+
if LooseVersion(xlrd.__VERSION__) >= LooseVersion("0.9.3"):
370+
# Xlrd >= 0.9.3 can handle Excel milliseconds.
371+
expected = DataFrame.from_items([("Time",
372+
[time(1, 2, 3),
373+
time(2, 45, 56, 100000),
374+
time(4, 29, 49, 200000),
375+
time(6, 13, 42, 300000),
376+
time(7, 57, 35, 400000),
377+
time(9, 41, 28, 500000),
378+
time(11, 25, 21, 600000),
379+
time(13, 9, 14, 700000),
380+
time(14, 53, 7, 800000),
381+
time(16, 37, 0, 900000),
382+
time(18, 20, 54)])])
383+
else:
384+
# Xlrd < 0.9.3 rounds Excel milliseconds.
385+
expected = DataFrame.from_items([("Time",
386+
[time(1, 2, 3),
387+
time(2, 45, 56),
388+
time(4, 29, 49),
389+
time(6, 13, 42),
390+
time(7, 57, 35),
391+
time(9, 41, 29),
392+
time(11, 25, 22),
393+
time(13, 9, 15),
394+
time(14, 53, 8),
395+
time(16, 37, 1),
396+
time(18, 20, 54)])])
397+
398+
epoch_1900 = os.path.join(self.dirpath, 'times_1900.xls')
399+
epoch_1904 = os.path.join(self.dirpath, 'times_1904.xls')
400+
401+
actual = read_excel(epoch_1900, 'Sheet1')
402+
tm.assert_frame_equal(actual, expected)
403+
404+
actual = read_excel(epoch_1904, 'Sheet1')
405+
tm.assert_frame_equal(actual, expected)
406+
363407

364408
class ExcelWriterBase(SharedItems):
365409
# Base class for test cases to run with different Excel writers.
@@ -400,7 +444,7 @@ def test_excel_deprecated_options(self):
400444
with ensure_clean(self.ext) as path:
401445
with tm.assert_produces_warning(FutureWarning):
402446
self.frame.to_excel(path, 'test1', cols=['A', 'B'])
403-
447+
404448
with tm.assert_produces_warning(False):
405449
self.frame.to_excel(path, 'test1', columns=['A', 'B'])
406450

@@ -832,9 +876,9 @@ def test_to_excel_output_encoding(self):
832876
index=[u('A\u0192'), 'B'], columns=[u('X\u0193'), 'Y', 'Z'])
833877

834878
with ensure_clean(filename) as filename:
835-
df.to_excel(filename, sheet_name = 'TestSheet', encoding='utf8')
836-
result = read_excel(filename, 'TestSheet', encoding = 'utf8')
837-
tm.assert_frame_equal(result,df)
879+
df.to_excel(filename, sheet_name='TestSheet', encoding='utf8')
880+
result = read_excel(filename, 'TestSheet', encoding='utf8')
881+
tm.assert_frame_equal(result, df)
838882

839883

840884
def test_to_excel_unicode_filename(self):

0 commit comments

Comments
 (0)