Skip to content

Commit 86a733f

Browse files
[XMLParser] Fix reentrancy issue around currentParser (#5061)
* [XMLParser] Use `TaskLocal` for storing the current parser Instead of thread-local storage, use `TaskLocal` to store the current parser. This solves three issues: 1. If someone calls `XMLParser.parse()` with a new parser instance in a delegate method call, it overwrote the current parser and wrote it back after the call as `nil`, not the previous current parser. This reentrancy issue can be a problem especially when someone uses external entity resolving since the feature depends on the current parser tracking. Using `TaskLocal` solves this issue since it tracks values as a stack and restores the previous value at the end of the `withValue` call. 2. Since jobs of different tasks can be scheduled on the same thread, different tasks can refer to the same thread-local storage. This wouldn't be a problem for now since the `parse()` method doesn't have any suspention points and different tasks can't run on the same thread during the parsing. However, it's better to use `TaskLocal` to leverage the concurrency model of Swift. 3. The global variable `_currentParser` existed in the WASI platform path but it's unsafe in the Swift concurrency model. It wouldn't be a problem on WASI since it's always single-threaded, we should avoid platform-specific assumption as much as possible. * Remove unnecessary `#if os(WASI)` condition in XMLParser.swift * Keep the current parser in TLS instead of TaskLocal TaskLocal storage is inherited by non-detached child tasks, which can lead to the parser being shared between tasks. This is not our intention and can lead to inconsistent state. Instead, we should keep the current parser in thread-local storage. This should be safe as long as we don't have any structured suspension points in `withCurrentParser` block. * Simplify the current parser context management
1 parent a12c7d1 commit 86a733f

File tree

2 files changed

+73
-59
lines changed

2 files changed

+73
-59
lines changed

Sources/FoundationXML/XMLParser.swift

+31-58
Original file line numberDiff line numberDiff line change
@@ -398,9 +398,7 @@ extension XMLParser : @unchecked Sendable { }
398398

399399
open class XMLParser : NSObject {
400400
private var _handler: _CFXMLInterfaceSAXHandler
401-
#if !os(WASI)
402401
internal var _stream: InputStream?
403-
#endif
404402
internal var _data: Data?
405403

406404
internal var _chunkSize = Int(4096 * 32) // a suitably large number for a decent chunk size
@@ -414,9 +412,6 @@ open class XMLParser : NSObject {
414412

415413
// initializes the parser with the specified URL.
416414
public convenience init?(contentsOf url: URL) {
417-
#if os(WASI)
418-
return nil
419-
#else
420415
setupXMLParsing()
421416
if url.isFileURL {
422417
if let stream = InputStream(url: url) {
@@ -434,7 +429,6 @@ open class XMLParser : NSObject {
434429
return nil
435430
}
436431
}
437-
#endif
438432
}
439433

440434
// create the parser from data
@@ -450,15 +444,13 @@ open class XMLParser : NSObject {
450444
_CFXMLInterfaceDestroyContext(_parserContext)
451445
}
452446

453-
#if !os(WASI)
454447
//create a parser that incrementally pulls data from the specified stream and parses it.
455448
public init(stream: InputStream) {
456449
setupXMLParsing()
457450
_stream = stream
458451
_handler = _CFXMLInterfaceCreateSAXHandler()
459452
_parserContext = nil
460453
}
461-
#endif
462454

463455
open weak var delegate: XMLParserDelegate?
464456

@@ -469,33 +461,35 @@ open class XMLParser : NSObject {
469461
open var externalEntityResolvingPolicy: ExternalEntityResolvingPolicy = .never
470462

471463
open var allowedExternalEntityURLs: Set<URL>?
472-
473-
#if os(WASI)
474-
private static var _currentParser: XMLParser?
475-
#endif
476464

465+
#if os(WASI)
466+
/// The current parser associated with the current thread. (assuming no multi-threading)
467+
/// FIXME: Unify the implementation with the other platforms once we unlock `threadDictionary`
468+
/// or migrate to `FoundationEssentials._ThreadLocal`.
469+
private static nonisolated(unsafe) var _currentParser: XMLParser? = nil
470+
#else
471+
/// The current parser associated with the current thread.
472+
private static var _currentParser: XMLParser? {
473+
get {
474+
return Thread.current.threadDictionary["__CurrentNSXMLParser"] as? XMLParser
475+
}
476+
set {
477+
Thread.current.threadDictionary["__CurrentNSXMLParser"] = newValue
478+
}
479+
}
480+
#endif
481+
482+
/// The current parser associated with the current thread.
477483
internal static func currentParser() -> XMLParser? {
478-
#if os(WASI)
479484
return _currentParser
480-
#else
481-
if let current = Thread.current.threadDictionary["__CurrentNSXMLParser"] {
482-
return current as? XMLParser
483-
} else {
484-
return nil
485-
}
486-
#endif
487485
}
488-
489-
internal static func setCurrentParser(_ parser: XMLParser?) {
490-
#if os(WASI)
486+
487+
/// Execute the given closure with the current parser set to the given parser.
488+
internal static func withCurrentParser<R>(_ parser: XMLParser, _ body: () -> R) -> R {
489+
let oldParser = _currentParser
491490
_currentParser = parser
492-
#else
493-
if let p = parser {
494-
Thread.current.threadDictionary["__CurrentNSXMLParser"] = p
495-
} else {
496-
Thread.current.threadDictionary.removeObject(forKey: "__CurrentNSXMLParser")
497-
}
498-
#endif
491+
defer { _currentParser = oldParser }
492+
return body()
499493
}
500494

501495
internal func _handleParseResult(_ parseResult: Int32) -> Bool {
@@ -569,7 +563,6 @@ open class XMLParser : NSObject {
569563
return result
570564
}
571565

572-
#if !os(WASI)
573566
internal func parseFrom(_ stream : InputStream) -> Bool {
574567
var result = true
575568

@@ -598,37 +591,17 @@ open class XMLParser : NSObject {
598591

599592
return result
600593
}
601-
#else
602-
internal func parse(from data: Data) -> Bool {
603-
var result = true
604-
var chunkStart = 0
605-
var chunkEnd = min(_chunkSize, data.count)
606-
while result && chunkStart < chunkEnd {
607-
let chunk = data[chunkStart..<chunkEnd]
608-
result = parseData(chunk)
609-
chunkStart = chunkEnd
610-
chunkEnd = min(chunkEnd + _chunkSize, data.count)
611-
}
612-
return result
613-
}
614-
#endif
615594

616595
// called to start the event-driven parse. Returns YES in the event of a successful parse, and NO in case of error.
617596
open func parse() -> Bool {
618-
#if os(WASI)
619-
return _data.map { parse(from: $0) } ?? false
620-
#else
621-
XMLParser.setCurrentParser(self)
622-
defer { XMLParser.setCurrentParser(nil) }
623-
624-
if _stream != nil {
625-
return parseFrom(_stream!)
626-
} else if _data != nil {
627-
return parseData(_data!, lastChunkOfData: true)
597+
return Self.withCurrentParser(self) {
598+
if _stream != nil {
599+
return parseFrom(_stream!)
600+
} else if _data != nil {
601+
return parseData(_data!, lastChunkOfData: true)
602+
}
603+
return false
628604
}
629-
630-
return false
631-
#endif
632605
}
633606

634607
// called by the delegate to stop the parse. The delegate will get an error message sent to it.

Tests/Foundation/TestXMLParser.swift

+42-1
Original file line numberDiff line numberDiff line change
@@ -198,5 +198,46 @@ class TestXMLParser : XCTestCase {
198198
ElementNameChecker("noPrefix").check()
199199
ElementNameChecker("myPrefix:myLocalName").check()
200200
}
201-
201+
202+
func testExternalEntity() throws {
203+
class Delegate: XMLParserDelegateEventStream {
204+
override func parserDidStartDocument(_ parser: XMLParser) {
205+
// Start a child parser, updating `currentParser` to the child parser
206+
// to ensure that `currentParser` won't be reset to `nil`, which would
207+
// ignore any external entity related configuration.
208+
let childParser = XMLParser(data: "<child />".data(using: .utf8)!)
209+
XCTAssertTrue(childParser.parse())
210+
super.parserDidStartDocument(parser)
211+
}
212+
}
213+
try withTemporaryDirectory { dir, _ in
214+
let greetingPath = dir.appendingPathComponent("greeting.xml")
215+
try Data("<hello />".utf8).write(to: greetingPath)
216+
let xml = """
217+
<?xml version="1.0" standalone="no"?>
218+
<!DOCTYPE doc [
219+
<!ENTITY greeting SYSTEM "\(greetingPath.absoluteString)">
220+
]>
221+
<doc>&greeting;</doc>
222+
"""
223+
224+
let parser = XMLParser(data: xml.data(using: .utf8)!)
225+
// Explicitly disable external entity resolving
226+
parser.externalEntityResolvingPolicy = .never
227+
let delegate = Delegate()
228+
parser.delegate = delegate
229+
// The parse result changes depending on the libxml2 version
230+
// because of the following libxml2 commit (shipped in libxml2 2.9.10):
231+
// https://gitlab.gnome.org/GNOME/libxml2/-/commit/eddfbc38fa7e84ccd480eab3738e40d1b2c83979
232+
// So we don't check the parse result here.
233+
_ = parser.parse()
234+
XCTAssertEqual(delegate.events, [
235+
.startDocument,
236+
.didStartElement("doc", nil, nil, [:]),
237+
// Should not have parsed the external entity
238+
.didEndElement("doc", nil, nil),
239+
.endDocument,
240+
])
241+
}
242+
}
202243
}

0 commit comments

Comments
 (0)