22
22
marks = [
23
23
td .skip_if_no ("xlrd" ),
24
24
pytest .mark .filterwarnings ("ignore:.*(tree\\ .iter|html argument)" ),
25
+ pytest .mark .filterwarnings (
26
+ 'ignore:The Excel reader engine "xlrd" is deprecated,'
27
+ ),
25
28
],
26
29
),
27
30
pytest .param (
34
37
pytest .param (
35
38
None ,
36
39
marks = [
37
- td .skip_if_no ("xlrd " ),
38
- pytest .mark .filterwarnings ("ignore:.*(tree \\ .iter| html argument) " ),
40
+ td .skip_if_no ("openpyxl " ),
41
+ pytest .mark .filterwarnings ("ignore:.*html argument" ),
39
42
],
40
43
),
41
44
pytest .param ("pyxlsb" , marks = td .skip_if_no ("pyxlsb" )),
@@ -51,6 +54,8 @@ def _is_valid_engine_ext_pair(engine, read_ext: str) -> bool:
51
54
engine = engine .values [0 ]
52
55
if engine == "openpyxl" and read_ext == ".xls" :
53
56
return False
57
+ if engine is None and read_ext == ".xls" :
58
+ return False
54
59
if engine == "odf" and read_ext != ".ods" :
55
60
return False
56
61
if read_ext == ".ods" and engine != "odf" :
@@ -559,7 +564,7 @@ def test_date_conversion_overflow(self, read_ext):
559
564
columns = ["DateColWithBigInt" , "StringCol" ],
560
565
)
561
566
562
- if pd .read_excel .keywords ["engine" ] == "openpyxl" :
567
+ if pd .read_excel .keywords ["engine" ] in [ "openpyxl" , None ] :
563
568
pytest .xfail ("Maybe not supported by openpyxl" )
564
569
565
570
result = pd .read_excel ("testdateoverflow" + read_ext )
@@ -942,7 +947,10 @@ def test_read_excel_squeeze(self, read_ext):
942
947
expected = pd .Series ([1 , 2 , 3 ], name = "a" )
943
948
tm .assert_series_equal (actual , expected )
944
949
945
- def test_deprecated_kwargs (self , read_ext ):
950
+ def test_deprecated_kwargs (self , engine , read_ext ):
951
+ if engine == "xlrd" :
952
+ pytest .skip ("Use of xlrd engine produces a FutureWarning as well" )
953
+
946
954
with tm .assert_produces_warning (FutureWarning , raise_on_extra_warnings = False ):
947
955
pd .read_excel ("test1" + read_ext , "Sheet1" , 0 )
948
956
@@ -961,6 +969,19 @@ def test_no_header_with_list_index_col(self, read_ext):
961
969
)
962
970
tm .assert_frame_equal (expected , result )
963
971
972
+ def test_excel_high_surrogate (self , engine , read_ext ):
973
+ # GH 23809
974
+ if read_ext != ".xlsx" :
975
+ pytest .skip ("Test is only applicable to .xlsx file" )
976
+ if engine in ["openpyxl" , None ]:
977
+ pytest .skip ("Test does not work for openpyxl" )
978
+
979
+ expected = pd .DataFrame (["\udc88 " ], columns = ["Column1" ])
980
+
981
+ # should not produce a segmentation violation
982
+ actual = pd .read_excel ("high_surrogate.xlsx" )
983
+ tm .assert_frame_equal (expected , actual )
984
+
964
985
965
986
class TestExcelFileRead :
966
987
@pytest .fixture (autouse = True )
@@ -1116,14 +1137,6 @@ def test_excel_read_binary(self, engine, read_ext):
1116
1137
actual = pd .read_excel (data , engine = engine )
1117
1138
tm .assert_frame_equal (expected , actual )
1118
1139
1119
- def test_excel_high_surrogate (self , engine ):
1120
- # GH 23809
1121
- expected = pd .DataFrame (["\udc88 " ], columns = ["Column1" ])
1122
-
1123
- # should not produce a segmentation violation
1124
- actual = pd .read_excel ("high_surrogate.xlsx" )
1125
- tm .assert_frame_equal (expected , actual )
1126
-
1127
1140
@pytest .mark .parametrize ("filename" , ["df_empty.xlsx" , "df_equals.xlsx" ])
1128
1141
def test_header_with_index_col (self , engine , filename ):
1129
1142
# GH 33476
0 commit comments