Skip to content

Commit 0b58c62

Browse files
authored
fix #54564 (#54567)
* added unit-test to highlight issue #54564 * fixed #54564 * added whatsnew entry * anticipate this fix going into v2.2.0 instead of v2.1.0 * LBYL instead of EAFP * address review request * fix tests on windows
1 parent ade0483 commit 0b58c62

File tree

4 files changed

+19
-5
lines changed

4 files changed

+19
-5
lines changed

doc/source/whatsnew/v2.2.0.rst

+1-2
Original file line numberDiff line numberDiff line change
@@ -174,8 +174,7 @@ MultiIndex
174174

175175
I/O
176176
^^^
177-
-
178-
-
177+
- Bug in :func:`read_excel`, with ``engine="xlrd"`` (``xls`` files) erroring when file contains NaNs/Infs (:issue:`54564`)
179178

180179
Period
181180
^^^^^^

pandas/io/excel/_xlrd.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from __future__ import annotations
22

33
from datetime import time
4+
import math
45
from typing import TYPE_CHECKING
56

67
import numpy as np
@@ -120,9 +121,11 @@ def _parse_cell(cell_contents, cell_typ):
120121
elif cell_typ == XL_CELL_NUMBER:
121122
# GH5394 - Excel 'numbers' are always floats
122123
# it's a minimal perf hit and less surprising
123-
val = int(cell_contents)
124-
if val == cell_contents:
125-
cell_contents = val
124+
if math.isfinite(cell_contents):
125+
# GH54564 - don't attempt to convert NaN/Inf
126+
val = int(cell_contents)
127+
if val == cell_contents:
128+
cell_contents = val
126129
return cell_contents
127130

128131
data = []

pandas/tests/io/data/excel/test6.xls

5.5 KB
Binary file not shown.

pandas/tests/io/excel/test_xlrd.py

+12
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import io
22

3+
import numpy as np
34
import pytest
45

56
import pandas as pd
@@ -44,6 +45,17 @@ def test_read_xlsx_fails(datapath):
4445
pd.read_excel(path, engine="xlrd")
4546

4647

48+
def test_nan_in_xls(datapath):
49+
# GH 54564
50+
path = datapath("io", "data", "excel", "test6.xls")
51+
52+
expected = pd.DataFrame({0: np.r_[0, 2].astype("int64"), 1: np.r_[1, np.nan]})
53+
54+
result = pd.read_excel(path, header=None)
55+
56+
tm.assert_frame_equal(result, expected)
57+
58+
4759
@pytest.mark.parametrize(
4860
"file_header",
4961
[

0 commit comments

Comments
 (0)