@@ -1271,7 +1271,7 @@ def test_wrong_dict_value(datapath, parser):
1271
1271
read_xml (filename , parser = parser , iterparse = {"book" : "category" })
1272
1272
1273
1273
1274
- def test_bad_xml (datapath , parser ):
1274
+ def test_bad_xml (parser ):
1275
1275
bad_xml = """\
1276
1276
<?xml version='1.0' encoding='utf-8'?>
1277
1277
<row>
@@ -1312,6 +1312,113 @@ def test_bad_xml(datapath, parser):
1312
1312
)
1313
1313
1314
1314
1315
+ def test_comment (parser ):
1316
+ xml = """\
1317
+ <!-- comment before root -->
1318
+ <shapes>
1319
+ <!-- comment within root -->
1320
+ <shape>
1321
+ <name>circle</name>
1322
+ <type>2D</type>
1323
+ </shape>
1324
+ <shape>
1325
+ <name>sphere</name>
1326
+ <type>3D</type>
1327
+ <!-- comment within child -->
1328
+ </shape>
1329
+ <!-- comment within root -->
1330
+ </shapes>
1331
+ <!-- comment after root -->"""
1332
+
1333
+ df_xpath = read_xml (xml , xpath = ".//shape" , parser = parser )
1334
+
1335
+ df_iter = read_xml_iterparse (
1336
+ xml , parser = parser , iterparse = {"shape" : ["name" , "type" ]}
1337
+ )
1338
+
1339
+ df_expected = DataFrame (
1340
+ {
1341
+ "name" : ["circle" , "sphere" ],
1342
+ "type" : ["2D" , "3D" ],
1343
+ }
1344
+ )
1345
+
1346
+ tm .assert_frame_equal (df_xpath , df_expected )
1347
+ tm .assert_frame_equal (df_iter , df_expected )
1348
+
1349
+
1350
+ def test_dtd (parser ):
1351
+ xml = """\
1352
+ <?xml version="1.0" encoding="UTF-8"?>
1353
+ <!DOCTYPE non-profits [
1354
+ <!ELEMENT shapes (shape*) >
1355
+ <!ELEMENT shape ( name, type )>
1356
+ <!ELEMENT name (#PCDATA)>
1357
+ ]>
1358
+ <shapes>
1359
+ <shape>
1360
+ <name>circle</name>
1361
+ <type>2D</type>
1362
+ </shape>
1363
+ <shape>
1364
+ <name>sphere</name>
1365
+ <type>3D</type>
1366
+ </shape>
1367
+ </shapes>"""
1368
+
1369
+ df_xpath = read_xml (xml , xpath = ".//shape" , parser = parser )
1370
+
1371
+ df_iter = read_xml_iterparse (
1372
+ xml , parser = parser , iterparse = {"shape" : ["name" , "type" ]}
1373
+ )
1374
+
1375
+ df_expected = DataFrame (
1376
+ {
1377
+ "name" : ["circle" , "sphere" ],
1378
+ "type" : ["2D" , "3D" ],
1379
+ }
1380
+ )
1381
+
1382
+ tm .assert_frame_equal (df_xpath , df_expected )
1383
+ tm .assert_frame_equal (df_iter , df_expected )
1384
+
1385
+
1386
+ def test_processing_instruction (parser ):
1387
+ xml = """\
1388
+ <?xml version="1.0" encoding="UTF-8"?>
1389
+ <?xml-stylesheet type="text/xsl" href="style.xsl"?>
1390
+ <?display table-view?>
1391
+ <?sort alpha-ascending?>
1392
+ <?textinfo whitespace is allowed ?>
1393
+ <?elementnames <shape>, <name>, <type> ?>
1394
+ <shapes>
1395
+ <shape>
1396
+ <name>circle</name>
1397
+ <type>2D</type>
1398
+ </shape>
1399
+ <shape>
1400
+ <name>sphere</name>
1401
+ <type>3D</type>
1402
+ </shape>
1403
+ </shapes>"""
1404
+
1405
+ df_xpath = read_xml (xml , xpath = ".//shape" , parser = parser )
1406
+
1407
+ df_iter = read_xml_iterparse (
1408
+ xml , parser = parser , iterparse = {"shape" : ["name" , "type" ]}
1409
+ )
1410
+
1411
+ df_expected = DataFrame (
1412
+ {
1413
+ "name" : ["circle" , "sphere" ],
1414
+ "type" : ["2D" , "3D" ],
1415
+ }
1416
+ )
1417
+
1418
+ tm .assert_frame_equal (df_xpath , df_expected )
1419
+ tm .assert_frame_equal (df_iter , df_expected )
1420
+
1421
+
1315
1422
def test_no_result (datapath , parser ):
1316
1423
filename = datapath ("io" , "data" , "xml" , "books.xml" )
1317
1424
with pytest .raises (
0 commit comments