Skip to content

Commit 1296ab3

Browse files
kordekjreback
authored andcommitted
BUG: Return non-dates in xls date cells directly
closes pandas-dev#10001 closes pandas-dev#13042 If there is a date column in excel in which there are cells with some big integers, that during parsing to date cause int/long overflow, issue a warning and convert the value to int or float.
1 parent 2cd1480 commit 1296ab3

File tree

6 files changed

+23
-4
lines changed

6 files changed

+23
-4
lines changed

doc/source/whatsnew/v0.18.2.txt

+1
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ API changes
3838
~~~~~~~~~~~
3939

4040

41+
- Non-convertible dates in an excel date column will be returned without conversion and the column will be ``object`` dtype, rather than raising an exception (:issue:`10001`)
4142

4243

4344

pandas/io/excel.py

+12-4
Original file line numberDiff line numberDiff line change
@@ -329,11 +329,15 @@ def _parse_cell(cell_contents, cell_typ):
329329
appropriate object"""
330330

331331
if cell_typ == XL_CELL_DATE:
332+
332333
if xlrd_0_9_3:
333334
# Use the newer xlrd datetime handling.
334-
cell_contents = xldate.xldate_as_datetime(cell_contents,
335-
epoch1904)
336-
335+
try:
336+
cell_contents = \
337+
xldate.xldate_as_datetime(cell_contents,
338+
epoch1904)
339+
except OverflowError:
340+
return cell_contents
337341
# Excel doesn't distinguish between dates and time,
338342
# so we treat dates on the epoch as times only.
339343
# Also, Excel supports 1900 and 1904 epochs.
@@ -346,7 +350,11 @@ def _parse_cell(cell_contents, cell_typ):
346350
cell_contents.microsecond)
347351
else:
348352
# Use the xlrd <= 0.9.2 date handling.
349-
dt = xldate.xldate_as_tuple(cell_contents, epoch1904)
353+
try:
354+
dt = xldate.xldate_as_tuple(cell_contents, epoch1904)
355+
356+
except xldate.XLDateTooLarge:
357+
return cell_contents
350358

351359
if dt[0] < MINYEAR:
352360
cell_contents = time(*dt[3:])
19 KB
Binary file not shown.
9.15 KB
Binary file not shown.
9.13 KB
Binary file not shown.

pandas/io/tests/test_excel.py

+10
Original file line numberDiff line numberDiff line change
@@ -481,6 +481,16 @@ def test_set_column_names_in_parameter(self):
481481
tm.assert_frame_equal(xlsdf_no_head, refdf)
482482
tm.assert_frame_equal(xlsdf_with_head, refdf)
483483

484+
def test_date_conversion_overflow(self):
485+
# GH 10001 : pandas.ExcelFile ignore parse_dates=False
486+
expected = pd.DataFrame([[pd.Timestamp('2016-03-12'), 'Marc Johnson'],
487+
[pd.Timestamp('2016-03-16'), 'Jack Black'],
488+
[1e+20, 'Timothy Brown']],
489+
columns=['DateColWithBigInt', 'StringCol'])
490+
491+
result = self.get_exceldf('testdateoverflow')
492+
tm.assert_frame_equal(result, expected)
493+
484494

485495
class XlrdTests(ReadingTestsBase):
486496
"""

0 commit comments

Comments
 (0)