@@ -111,58 +111,6 @@ def spam_data(self, datapath):
111
111
def banklist_data (self , datapath ):
112
112
return datapath ("io" , "data" , "html" , "banklist.html" )
113
113
114
- @pytest .fixture
115
- def gh_13141_data (self ):
116
- return """
117
- <table>
118
- <tr>
119
- <th>HTTP</th>
120
- <th>FTP</th>
121
- <th><a href="https://en.wiktionary.org/wiki/linkless">Linkless</a></th>
122
- </tr>
123
- <tr>
124
- <td><a href="https://en.wikipedia.org/">Wikipedia</a></td>
125
- <td>SURROUNDING <a href="ftp://ftp.us.debian.org/">Debian</a> TEXT</td>
126
- <td>Linkless</td>
127
- </tr>
128
- <tfoot>
129
- <tr>
130
- <td><a href="https://en.wikipedia.org/wiki/Page_footer">Footer</a></td>
131
- <td>
132
- Multiple <a href="1">links:</a> <a href="2">Only first captured.</a>
133
- </td>
134
- </tr>
135
- </tfoot>
136
- </table>
137
- """
138
-
139
- @pytest .fixture
140
- def gh_13141_expected (self ):
141
- return {
142
- "head_ignore" : ["HTTP" , "FTP" , "Linkless" ],
143
- "head_extract" : [
144
- ("HTTP" , None ),
145
- ("FTP" , None ),
146
- ("Linkless" , "https://en.wiktionary.org/wiki/linkless" ),
147
- ],
148
- "body_ignore" : ["Wikipedia" , "SURROUNDING Debian TEXT" , "Linkless" ],
149
- "body_extract" : [
150
- ("Wikipedia" , "https://en.wikipedia.org/" ),
151
- ("SURROUNDING Debian TEXT" , "ftp://ftp.us.debian.org/" ),
152
- ("Linkless" , None ),
153
- ],
154
- "footer_ignore" : [
155
- "Footer" ,
156
- "Multiple links: Only first captured." ,
157
- None ,
158
- ],
159
- "footer_extract" : [
160
- ("Footer" , "https://en.wikipedia.org/wiki/Page_footer" ),
161
- ("Multiple links: Only first captured." , "1" ),
162
- None ,
163
- ],
164
- }
165
-
166
114
@pytest .fixture (autouse = True , scope = "function" )
167
115
def set_defaults (self , flavor ):
168
116
self .read_html = partial (read_html , flavor = flavor )
@@ -1394,7 +1342,55 @@ def test_parse_br_as_space(self):
1394
1342
tm .assert_frame_equal (result , expected )
1395
1343
1396
1344
@pytest .mark .parametrize ("arg" , ["all" , "body" , "header" , "footer" ])
1397
- def test_extract_links (self , gh_13141_data , gh_13141_expected , arg ):
1345
+ def test_extract_links (self , arg ):
1346
+ gh_13141_data = """
1347
+ <table>
1348
+ <tr>
1349
+ <th>HTTP</th>
1350
+ <th>FTP</th>
1351
+ <th><a href="https://en.wiktionary.org/wiki/linkless">Linkless</a></th>
1352
+ </tr>
1353
+ <tr>
1354
+ <td><a href="https://en.wikipedia.org/">Wikipedia</a></td>
1355
+ <td>SURROUNDING <a href="ftp://ftp.us.debian.org/">Debian</a> TEXT</td>
1356
+ <td>Linkless</td>
1357
+ </tr>
1358
+ <tfoot>
1359
+ <tr>
1360
+ <td><a href="https://en.wikipedia.org/wiki/Page_footer">Footer</a></td>
1361
+ <td>
1362
+ Multiple <a href="1">links:</a> <a href="2">Only first captured.</a>
1363
+ </td>
1364
+ </tr>
1365
+ </tfoot>
1366
+ </table>
1367
+ """
1368
+
1369
+ gh_13141_expected = {
1370
+ "head_ignore" : ["HTTP" , "FTP" , "Linkless" ],
1371
+ "head_extract" : [
1372
+ ("HTTP" , None ),
1373
+ ("FTP" , None ),
1374
+ ("Linkless" , "https://en.wiktionary.org/wiki/linkless" ),
1375
+ ],
1376
+ "body_ignore" : ["Wikipedia" , "SURROUNDING Debian TEXT" , "Linkless" ],
1377
+ "body_extract" : [
1378
+ ("Wikipedia" , "https://en.wikipedia.org/" ),
1379
+ ("SURROUNDING Debian TEXT" , "ftp://ftp.us.debian.org/" ),
1380
+ ("Linkless" , None ),
1381
+ ],
1382
+ "footer_ignore" : [
1383
+ "Footer" ,
1384
+ "Multiple links: Only first captured." ,
1385
+ None ,
1386
+ ],
1387
+ "footer_extract" : [
1388
+ ("Footer" , "https://en.wikipedia.org/wiki/Page_footer" ),
1389
+ ("Multiple links: Only first captured." , "1" ),
1390
+ None ,
1391
+ ],
1392
+ }
1393
+
1398
1394
data_exp = gh_13141_expected ["body_ignore" ]
1399
1395
foot_exp = gh_13141_expected ["footer_ignore" ]
1400
1396
head_exp = gh_13141_expected ["head_ignore" ]
@@ -1413,10 +1409,10 @@ def test_extract_links(self, gh_13141_data, gh_13141_expected, arg):
1413
1409
expected = DataFrame ([data_exp , foot_exp ], columns = head_exp )
1414
1410
tm .assert_frame_equal (result , expected )
1415
1411
1416
- def test_extract_links_bad (self , gh_13141_data ):
1412
+ def test_extract_links_bad (self , spam_data ):
1417
1413
msg = (
1418
1414
"`extract_links` must be one of "
1419
1415
'{None, "header", "footer", "body", "all"}, got "incorrect"'
1420
1416
)
1421
1417
with pytest .raises (ValueError , match = msg ):
1422
- read_html (gh_13141_data , extract_links = "incorrect" )
1418
+ read_html (spam_data , extract_links = "incorrect" )
0 commit comments